2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								""" codecs -- Python Codec Registry, API and helpers.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Written by Marc-Andre Lemburg (mal@lemburg.com).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""#"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-12-12 17:37:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import __builtin__, sys
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Registry and builtin stateless codec functions
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-31 17:25:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    from _codecs import *
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								except ImportError, why:
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-18 16:35:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    raise SystemError('Failed to load the builtin codecs: %s' % why)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-04 15:16:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
							 | 
						
					
						
							
								
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								           "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           "strict_errors", "ignore_errors", "replace_errors",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           "xmlcharrefreplace_errors",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								           "register_error", "lookup_error"]
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-20 19:54:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Constants
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-04 15:16:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# and its possible byte string values
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# for UTF8/UTF16/UTF32 output and little/big endian machines
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-04 15:16:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# UTF-8
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM_UTF8 = '\xef\xbb\xbf'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# UTF-16, little endian
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM_LE = BOM_UTF16_LE = '\xff\xfe'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# UTF-16, big endian
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM_BE = BOM_UTF16_BE = '\xfe\xff'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# UTF-32, little endian
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM_UTF32_LE = '\xff\xfe\x00\x00'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# UTF-32, big endian
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM_UTF32_BE = '\x00\x00\xfe\xff'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-12-12 17:37:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								if sys.byteorder == 'little':
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-04 15:16:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-12-12 17:37:50 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # UTF-16, native endianness
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    BOM = BOM_UTF16 = BOM_UTF16_LE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # UTF-32, native endianness
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    BOM_UTF32 = BOM_UTF32_LE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # UTF-16, native endianness
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    BOM = BOM_UTF16 = BOM_UTF16_BE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # UTF-32, native endianness
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    BOM_UTF32 = BOM_UTF32_BE
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-04 15:16:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# Old broken names (don't use in new code)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM32_LE = BOM_UTF16_LE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM32_BE = BOM_UTF16_BE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM64_LE = BOM_UTF32_LE
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								BOM64_BE = BOM_UTF32_BE
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Codec base classes (defining the API)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class CodecInfo(tuple):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        incrementalencoder=None, incrementaldecoder=None, name=None):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.name = name
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.encode = encode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.decode = decode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.incrementalencoder = incrementalencoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.incrementaldecoder = incrementaldecoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.streamwriter = streamwriter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.streamreader = streamreader
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __repr__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class Codec:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Defines the interface for stateless encoders/decoders.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        The .encode()/.decode() methods may use different error
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        handling schemes by providing the errors argument. These
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        string values are predefined:
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-24 22:14:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         'strict' - raise a ValueError error (or a subclass)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         'ignore' - ignore the character and continue with the next
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         'replace' - replace with a suitable replacement character;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    Python will use the official U+FFFD REPLACEMENT
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    CHARACTER for the builtin Unicode codecs on
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    decoding and '?' on encoding.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         'xmlcharrefreplace' - Replace with the appropriate XML
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                               character reference (only for encoding).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         'backslashreplace'  - Replace with backslashed escape sequences
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                               (only for encoding).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        The set of allowed values can be extended via register_error.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def encode(self, input, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-17 15:40:35 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """ Encodes the object input and returns a tuple (output
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            object, length consumed).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            errors defines the error handling to apply. It defaults to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'strict' handling.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            The method may not store state in the Codec instance. Use
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            StreamCodec for codecs which have to keep state in order to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            make encoding/decoding efficient.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            The encoder must be able to handle zero length input and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return an empty object of the output object type in this
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            situation.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def decode(self, input, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Decodes the object input and returns a tuple (output
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            object, length consumed).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            input must be an object which provides the bf_getreadbuf
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            buffer slot. Python strings, buffer objects and memory
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            mapped files are examples of objects providing this slot.
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            errors defines the error handling to apply. It defaults to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'strict' handling.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            The method may not store state in the Codec instance. Use
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            StreamCodec for codecs which have to keep state in order to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            make encoding/decoding efficient.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            The decoder must be able to handle zero length input and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return an empty object of the output object type in this
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            situation.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class IncrementalEncoder(object):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-16 07:49:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    An IncrementalEncoder encodes an input in multiple steps. The input can be
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    passed piece by piece to the encode() method. The IncrementalEncoder remembers
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    the state of the Encoding process between calls to encode().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, errors='strict'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-16 07:49:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        Creates an IncrementalEncoder instance.
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        The IncrementalEncoder may use different error handling schemes by
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        providing the errors keyword argument. See the module docstring
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for a list of possible values.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = ""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def encode(self, input, final=False):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Encodes input and returns the resulting object.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Resets the encoder to the initial state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												Merged revisions 61750,61752,61754,61756,61760,61763,61768,61772,61775,61805,61809,61812,61819,61917,61920,61930,61933-61934 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/trunk-bytearray
........
  r61750 | christian.heimes | 2008-03-22 20:47:44 +0100 (Sat, 22 Mar 2008) | 1 line
  Copied files from py3k w/o modifications
........
  r61752 | christian.heimes | 2008-03-22 20:53:20 +0100 (Sat, 22 Mar 2008) | 7 lines
  Take One
  * Added initialization code, warnings, flags etc. to the appropriate places
  * Added new buffer interface to string type
  * Modified tests
  * Modified Makefile.pre.in to compile the new files
  * Added bytesobject.c to Python.h
........
  r61754 | christian.heimes | 2008-03-22 21:22:19 +0100 (Sat, 22 Mar 2008) | 2 lines
  Disabled bytearray.extend for now since it causes an infinite recursion
  Fixed serveral unit tests
........
  r61756 | christian.heimes | 2008-03-22 21:43:38 +0100 (Sat, 22 Mar 2008) | 5 lines
  Added PyBytes support to several places:
  str + bytearray
  ord(bytearray)
  bytearray(str, encoding)
........
  r61760 | christian.heimes | 2008-03-22 21:56:32 +0100 (Sat, 22 Mar 2008) | 1 line
  Fixed more unit tests related to type('') is not unicode
........
  r61763 | christian.heimes | 2008-03-22 22:20:28 +0100 (Sat, 22 Mar 2008) | 2 lines
  Fixed more unit tests
  Fixed bytearray.extend
........
  r61768 | christian.heimes | 2008-03-22 22:40:50 +0100 (Sat, 22 Mar 2008) | 1 line
  Implemented old buffer interface for bytearray
........
  r61772 | christian.heimes | 2008-03-22 23:24:52 +0100 (Sat, 22 Mar 2008) | 1 line
  Added backport of the io module
........
  r61775 | christian.heimes | 2008-03-23 03:50:49 +0100 (Sun, 23 Mar 2008) | 1 line
  Fix str assignement to bytearray. Assignment of a str of size 1 is interpreted as a single byte
........
  r61805 | christian.heimes | 2008-03-23 19:33:48 +0100 (Sun, 23 Mar 2008) | 3 lines
  Fixed more tests
  Fixed bytearray() comparsion with unicode()
  Fixed iterator assignment of bytearray
........
  r61809 | christian.heimes | 2008-03-23 21:02:21 +0100 (Sun, 23 Mar 2008) | 2 lines
  str(bytesarray()) now returns the bytes and not the representation of the bytearray object
  Enabled and fixed more unit tests
........
  r61812 | christian.heimes | 2008-03-23 21:53:08 +0100 (Sun, 23 Mar 2008) | 3 lines
  Clear error PyNumber_AsSsize_t() fails
  Use CHARMASK for ob_svall access
  disabled a test with memoryview again
........
  r61819 | christian.heimes | 2008-03-23 23:05:57 +0100 (Sun, 23 Mar 2008) | 1 line
  Untested updates to the PCBuild directory
........
  r61917 | christian.heimes | 2008-03-26 00:57:06 +0100 (Wed, 26 Mar 2008) | 1 line
  The type system of Python 2.6 has subtle differences to 3.0's. I've removed the Py_TPFLAGS_BASETYPE flags from bytearray for now. bytearray can't be subclasses until the issues with bytearray subclasses are fixed.
........
  r61920 | christian.heimes | 2008-03-26 01:44:08 +0100 (Wed, 26 Mar 2008) | 2 lines
  Disabled last failing test
  I don't understand what the test is testing and how it suppose to work. Ka-Ping, please check it out.
........
  r61930 | christian.heimes | 2008-03-26 12:46:18 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytes warning code
........
  r61933 | christian.heimes | 2008-03-26 13:20:46 +0100 (Wed, 26 Mar 2008) | 1 line
  Fixed a bug in the new buffer protocol. The buffer slots weren't copied into a subclass.
........
  r61934 | christian.heimes | 2008-03-26 13:25:09 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytearray subclassing - all tests are passing.
........
											
										 
										
											2008-03-26 12:49:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def getstate(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Return the current state of the encoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def setstate(self, state):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Set the current state of the encoder. state must have been
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        returned by getstate().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-14 18:25:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class BufferedIncrementalEncoder(IncrementalEncoder):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    This subclass of IncrementalEncoder can be used as the baseclass for an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    incremental encoder if the encoder must keep some of the output in a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    buffer between calls to encode().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, errors='strict'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        IncrementalEncoder.__init__(self, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = "" # unencoded input that is kept between calls to encode()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _buffer_encode(self, input, errors, final):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Overwrite this method in subclasses: It must encode input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # and return an (output, length consumed) tuple
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def encode(self, input, final=False):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # encode input (taking the buffer into account)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data = self.buffer + input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        (result, consumed) = self._buffer_encode(data, self.errors, final)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # keep unencoded input until the next call
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = data[consumed:]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return result
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        IncrementalEncoder.reset(self)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = ""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												Merged revisions 61750,61752,61754,61756,61760,61763,61768,61772,61775,61805,61809,61812,61819,61917,61920,61930,61933-61934 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/trunk-bytearray
........
  r61750 | christian.heimes | 2008-03-22 20:47:44 +0100 (Sat, 22 Mar 2008) | 1 line
  Copied files from py3k w/o modifications
........
  r61752 | christian.heimes | 2008-03-22 20:53:20 +0100 (Sat, 22 Mar 2008) | 7 lines
  Take One
  * Added initialization code, warnings, flags etc. to the appropriate places
  * Added new buffer interface to string type
  * Modified tests
  * Modified Makefile.pre.in to compile the new files
  * Added bytesobject.c to Python.h
........
  r61754 | christian.heimes | 2008-03-22 21:22:19 +0100 (Sat, 22 Mar 2008) | 2 lines
  Disabled bytearray.extend for now since it causes an infinite recursion
  Fixed serveral unit tests
........
  r61756 | christian.heimes | 2008-03-22 21:43:38 +0100 (Sat, 22 Mar 2008) | 5 lines
  Added PyBytes support to several places:
  str + bytearray
  ord(bytearray)
  bytearray(str, encoding)
........
  r61760 | christian.heimes | 2008-03-22 21:56:32 +0100 (Sat, 22 Mar 2008) | 1 line
  Fixed more unit tests related to type('') is not unicode
........
  r61763 | christian.heimes | 2008-03-22 22:20:28 +0100 (Sat, 22 Mar 2008) | 2 lines
  Fixed more unit tests
  Fixed bytearray.extend
........
  r61768 | christian.heimes | 2008-03-22 22:40:50 +0100 (Sat, 22 Mar 2008) | 1 line
  Implemented old buffer interface for bytearray
........
  r61772 | christian.heimes | 2008-03-22 23:24:52 +0100 (Sat, 22 Mar 2008) | 1 line
  Added backport of the io module
........
  r61775 | christian.heimes | 2008-03-23 03:50:49 +0100 (Sun, 23 Mar 2008) | 1 line
  Fix str assignement to bytearray. Assignment of a str of size 1 is interpreted as a single byte
........
  r61805 | christian.heimes | 2008-03-23 19:33:48 +0100 (Sun, 23 Mar 2008) | 3 lines
  Fixed more tests
  Fixed bytearray() comparsion with unicode()
  Fixed iterator assignment of bytearray
........
  r61809 | christian.heimes | 2008-03-23 21:02:21 +0100 (Sun, 23 Mar 2008) | 2 lines
  str(bytesarray()) now returns the bytes and not the representation of the bytearray object
  Enabled and fixed more unit tests
........
  r61812 | christian.heimes | 2008-03-23 21:53:08 +0100 (Sun, 23 Mar 2008) | 3 lines
  Clear error PyNumber_AsSsize_t() fails
  Use CHARMASK for ob_svall access
  disabled a test with memoryview again
........
  r61819 | christian.heimes | 2008-03-23 23:05:57 +0100 (Sun, 23 Mar 2008) | 1 line
  Untested updates to the PCBuild directory
........
  r61917 | christian.heimes | 2008-03-26 00:57:06 +0100 (Wed, 26 Mar 2008) | 1 line
  The type system of Python 2.6 has subtle differences to 3.0's. I've removed the Py_TPFLAGS_BASETYPE flags from bytearray for now. bytearray can't be subclasses until the issues with bytearray subclasses are fixed.
........
  r61920 | christian.heimes | 2008-03-26 01:44:08 +0100 (Wed, 26 Mar 2008) | 2 lines
  Disabled last failing test
  I don't understand what the test is testing and how it suppose to work. Ka-Ping, please check it out.
........
  r61930 | christian.heimes | 2008-03-26 12:46:18 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytes warning code
........
  r61933 | christian.heimes | 2008-03-26 13:20:46 +0100 (Wed, 26 Mar 2008) | 1 line
  Fixed a bug in the new buffer protocol. The buffer slots weren't copied into a subclass.
........
  r61934 | christian.heimes | 2008-03-26 13:25:09 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytearray subclassing - all tests are passing.
........
											
										 
										
											2008-03-26 12:49:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def getstate(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.buffer or 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def setstate(self, state):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = state or ""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class IncrementalDecoder(object):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    An IncrementalDecoder decodes an input in multiple steps. The input can be
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    passed piece by piece to the decode() method. The IncrementalDecoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    remembers the state of the decoding process between calls to decode().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, errors='strict'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Creates a IncrementalDecoder instance.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        The IncrementalDecoder may use different error handling schemes by
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        providing the errors keyword argument. See the module docstring
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        for a list of possible values.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def decode(self, input, final=False):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Decodes input and returns the resulting object.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Resets the decoder to the initial state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												Merged revisions 61750,61752,61754,61756,61760,61763,61768,61772,61775,61805,61809,61812,61819,61917,61920,61930,61933-61934 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/trunk-bytearray
........
  r61750 | christian.heimes | 2008-03-22 20:47:44 +0100 (Sat, 22 Mar 2008) | 1 line
  Copied files from py3k w/o modifications
........
  r61752 | christian.heimes | 2008-03-22 20:53:20 +0100 (Sat, 22 Mar 2008) | 7 lines
  Take One
  * Added initialization code, warnings, flags etc. to the appropriate places
  * Added new buffer interface to string type
  * Modified tests
  * Modified Makefile.pre.in to compile the new files
  * Added bytesobject.c to Python.h
........
  r61754 | christian.heimes | 2008-03-22 21:22:19 +0100 (Sat, 22 Mar 2008) | 2 lines
  Disabled bytearray.extend for now since it causes an infinite recursion
  Fixed serveral unit tests
........
  r61756 | christian.heimes | 2008-03-22 21:43:38 +0100 (Sat, 22 Mar 2008) | 5 lines
  Added PyBytes support to several places:
  str + bytearray
  ord(bytearray)
  bytearray(str, encoding)
........
  r61760 | christian.heimes | 2008-03-22 21:56:32 +0100 (Sat, 22 Mar 2008) | 1 line
  Fixed more unit tests related to type('') is not unicode
........
  r61763 | christian.heimes | 2008-03-22 22:20:28 +0100 (Sat, 22 Mar 2008) | 2 lines
  Fixed more unit tests
  Fixed bytearray.extend
........
  r61768 | christian.heimes | 2008-03-22 22:40:50 +0100 (Sat, 22 Mar 2008) | 1 line
  Implemented old buffer interface for bytearray
........
  r61772 | christian.heimes | 2008-03-22 23:24:52 +0100 (Sat, 22 Mar 2008) | 1 line
  Added backport of the io module
........
  r61775 | christian.heimes | 2008-03-23 03:50:49 +0100 (Sun, 23 Mar 2008) | 1 line
  Fix str assignement to bytearray. Assignment of a str of size 1 is interpreted as a single byte
........
  r61805 | christian.heimes | 2008-03-23 19:33:48 +0100 (Sun, 23 Mar 2008) | 3 lines
  Fixed more tests
  Fixed bytearray() comparsion with unicode()
  Fixed iterator assignment of bytearray
........
  r61809 | christian.heimes | 2008-03-23 21:02:21 +0100 (Sun, 23 Mar 2008) | 2 lines
  str(bytesarray()) now returns the bytes and not the representation of the bytearray object
  Enabled and fixed more unit tests
........
  r61812 | christian.heimes | 2008-03-23 21:53:08 +0100 (Sun, 23 Mar 2008) | 3 lines
  Clear error PyNumber_AsSsize_t() fails
  Use CHARMASK for ob_svall access
  disabled a test with memoryview again
........
  r61819 | christian.heimes | 2008-03-23 23:05:57 +0100 (Sun, 23 Mar 2008) | 1 line
  Untested updates to the PCBuild directory
........
  r61917 | christian.heimes | 2008-03-26 00:57:06 +0100 (Wed, 26 Mar 2008) | 1 line
  The type system of Python 2.6 has subtle differences to 3.0's. I've removed the Py_TPFLAGS_BASETYPE flags from bytearray for now. bytearray can't be subclasses until the issues with bytearray subclasses are fixed.
........
  r61920 | christian.heimes | 2008-03-26 01:44:08 +0100 (Wed, 26 Mar 2008) | 2 lines
  Disabled last failing test
  I don't understand what the test is testing and how it suppose to work. Ka-Ping, please check it out.
........
  r61930 | christian.heimes | 2008-03-26 12:46:18 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytes warning code
........
  r61933 | christian.heimes | 2008-03-26 13:20:46 +0100 (Wed, 26 Mar 2008) | 1 line
  Fixed a bug in the new buffer protocol. The buffer slots weren't copied into a subclass.
........
  r61934 | christian.heimes | 2008-03-26 13:25:09 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytearray subclassing - all tests are passing.
........
											
										 
										
											2008-03-26 12:49:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def getstate(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Return the current state of the decoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        This must be a (buffered_input, additional_state_info) tuple.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        buffered_input must be a bytes object containing bytes that
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        were passed to decode() that have not yet been converted.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        additional_state_info must be a non-negative integer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        representing the state of the decoder WITHOUT yet having
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        processed the contents of buffered_input.  In the initial state
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        and after reset(), getstate() must return (b"", 0).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return (b"", 0)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def setstate(self, state):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Set the current state of the decoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        state must have been returned by getstate().  The effect of
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        setstate((b"", 0)) must be equivalent to reset().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class BufferedIncrementalDecoder(IncrementalDecoder):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    This subclass of IncrementalDecoder can be used as the baseclass for an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    incremental decoder if the decoder must be able to handle incomplete byte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sequences.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, errors='strict'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        IncrementalDecoder.__init__(self, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = "" # undecoded input that is kept between calls to decode()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _buffer_decode(self, input, errors, final):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Overwrite this method in subclasses: It must decode input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # and return an (output, length consumed) tuple
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def decode(self, input, final=False):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # decode input (taking the buffer into account)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data = self.buffer + input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        (result, consumed) = self._buffer_decode(data, self.errors, final)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # keep undecoded input until the next call
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = data[consumed:]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return result
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        IncrementalDecoder.reset(self)
							 | 
						
					
						
							
								
									
										
										
										
											2006-04-14 15:40:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.buffer = ""
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												Merged revisions 61750,61752,61754,61756,61760,61763,61768,61772,61775,61805,61809,61812,61819,61917,61920,61930,61933-61934 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/trunk-bytearray
........
  r61750 | christian.heimes | 2008-03-22 20:47:44 +0100 (Sat, 22 Mar 2008) | 1 line
  Copied files from py3k w/o modifications
........
  r61752 | christian.heimes | 2008-03-22 20:53:20 +0100 (Sat, 22 Mar 2008) | 7 lines
  Take One
  * Added initialization code, warnings, flags etc. to the appropriate places
  * Added new buffer interface to string type
  * Modified tests
  * Modified Makefile.pre.in to compile the new files
  * Added bytesobject.c to Python.h
........
  r61754 | christian.heimes | 2008-03-22 21:22:19 +0100 (Sat, 22 Mar 2008) | 2 lines
  Disabled bytearray.extend for now since it causes an infinite recursion
  Fixed serveral unit tests
........
  r61756 | christian.heimes | 2008-03-22 21:43:38 +0100 (Sat, 22 Mar 2008) | 5 lines
  Added PyBytes support to several places:
  str + bytearray
  ord(bytearray)
  bytearray(str, encoding)
........
  r61760 | christian.heimes | 2008-03-22 21:56:32 +0100 (Sat, 22 Mar 2008) | 1 line
  Fixed more unit tests related to type('') is not unicode
........
  r61763 | christian.heimes | 2008-03-22 22:20:28 +0100 (Sat, 22 Mar 2008) | 2 lines
  Fixed more unit tests
  Fixed bytearray.extend
........
  r61768 | christian.heimes | 2008-03-22 22:40:50 +0100 (Sat, 22 Mar 2008) | 1 line
  Implemented old buffer interface for bytearray
........
  r61772 | christian.heimes | 2008-03-22 23:24:52 +0100 (Sat, 22 Mar 2008) | 1 line
  Added backport of the io module
........
  r61775 | christian.heimes | 2008-03-23 03:50:49 +0100 (Sun, 23 Mar 2008) | 1 line
  Fix str assignement to bytearray. Assignment of a str of size 1 is interpreted as a single byte
........
  r61805 | christian.heimes | 2008-03-23 19:33:48 +0100 (Sun, 23 Mar 2008) | 3 lines
  Fixed more tests
  Fixed bytearray() comparsion with unicode()
  Fixed iterator assignment of bytearray
........
  r61809 | christian.heimes | 2008-03-23 21:02:21 +0100 (Sun, 23 Mar 2008) | 2 lines
  str(bytesarray()) now returns the bytes and not the representation of the bytearray object
  Enabled and fixed more unit tests
........
  r61812 | christian.heimes | 2008-03-23 21:53:08 +0100 (Sun, 23 Mar 2008) | 3 lines
  Clear error PyNumber_AsSsize_t() fails
  Use CHARMASK for ob_svall access
  disabled a test with memoryview again
........
  r61819 | christian.heimes | 2008-03-23 23:05:57 +0100 (Sun, 23 Mar 2008) | 1 line
  Untested updates to the PCBuild directory
........
  r61917 | christian.heimes | 2008-03-26 00:57:06 +0100 (Wed, 26 Mar 2008) | 1 line
  The type system of Python 2.6 has subtle differences to 3.0's. I've removed the Py_TPFLAGS_BASETYPE flags from bytearray for now. bytearray can't be subclasses until the issues with bytearray subclasses are fixed.
........
  r61920 | christian.heimes | 2008-03-26 01:44:08 +0100 (Wed, 26 Mar 2008) | 2 lines
  Disabled last failing test
  I don't understand what the test is testing and how it suppose to work. Ka-Ping, please check it out.
........
  r61930 | christian.heimes | 2008-03-26 12:46:18 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytes warning code
........
  r61933 | christian.heimes | 2008-03-26 13:20:46 +0100 (Wed, 26 Mar 2008) | 1 line
  Fixed a bug in the new buffer protocol. The buffer slots weren't copied into a subclass.
........
  r61934 | christian.heimes | 2008-03-26 13:25:09 +0100 (Wed, 26 Mar 2008) | 1 line
  Re-enabled bytearray subclassing - all tests are passing.
........
											
										 
										
											2008-03-26 12:49:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def getstate(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # additional state info is always 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return (self.buffer, 0)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def setstate(self, state):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # ignore additional state info
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.buffer = state[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# The StreamWriter and StreamReader class provide generic working
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-18 20:29:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# interfaces which can be used to implement new encoding submodules
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# very easily. See encodings/utf_8.py for an example on how this is
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# done.
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamWriter(Codec):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, stream, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Creates a StreamWriter instance.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream must be a file-like object open for writing
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            (binary) data.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            The StreamWriter may use different error handling
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            schemes by providing the errors keyword argument. These
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            parameters are predefined:
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'strict' - raise a ValueError (or a subclass)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'ignore' - ignore the character and continue with the next
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'replace'- replace with a suitable replacement character
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								             'xmlcharrefreplace' - Replace with the appropriate XML
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                   character reference.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'backslashreplace'  - Replace with backslashed escape
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                                   sequences (only for encoding).
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            The set of allowed parameter values can be extended via
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            register_error.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream = stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def write(self, object):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Writes the object's contents encoded to self.stream.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        data, consumed = self.encode(object, self.errors)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.write(data)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def writelines(self, list):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Writes the concatenated list of strings to the stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            using .write().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.write(''.join(list))
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Flushes and resets the codec buffers used for keeping state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Calling this method should ensure that the data on the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            output is put into a clean state, that allows appending
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            of new fresh data without having to rescan the whole
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream to recover state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pass
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __getattr__(self, name,
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    getattr=getattr):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Inherit all other methods from the underlying stream.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return getattr(self.stream, name)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-10-29 08:39:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __enter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __exit__(self, type, value, tb):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.close()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								###
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamReader(Codec):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, stream, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Creates a StreamReader instance.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream must be a file-like object open for reading
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            (binary) data.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            The StreamReader may use different error handling
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            schemes by providing the errors keyword argument. These
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            parameters are predefined:
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'strict' - raise a ValueError (or a subclass)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'ignore' - ignore the character and continue with the next
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								             'replace'- replace with a suitable replacement character;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-19 21:42:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            The set of allowed parameter values can be extended via
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            register_error.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream = stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.bytebuffer = ""
							 | 
						
					
						
							
								
									
										
										
										
											2005-07-20 22:15:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # For str->str decoding this will stay a str
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # For str->unicode decoding the first read will promote it to unicode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.charbuffer = ""
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-18 08:34:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.linebuffer = None
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def decode(self, input, errors='strict'):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise NotImplementedError
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-24 07:38:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def read(self, size=-1, chars=-1, firstline=False):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Decodes data from the stream self.stream and returns the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            resulting object.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            chars indicates the number of characters to read from the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream. read() will never return more than chars
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            characters, but it might return less, if there are not enough
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            characters available.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            size indicates the approximate maximum number of bytes to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            read from the stream for decoding purposes. The decoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            can modify this setting as appropriate. The default value
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            -1 indicates to read and decode as much as possible.  size
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            is intended to prevent having to decode huge files in one
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            step.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-24 07:38:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            If firstline is true, and a UnicodeDecodeError happens
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            after the first line terminator in the input only the first line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            will be returned, the rest of the input will be kept until the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            next call to read().
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            The method should use a greedy read strategy meaning that
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            it should read as much data as is allowed within the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            definition of the encoding and the given size, e.g.  if
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            optional encoding endings or state markers are available
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            on the stream, these should be read too.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-18 08:34:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # If we have lines cached, first merge them back into characters
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if self.linebuffer:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.charbuffer = "".join(self.linebuffer)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.linebuffer = None
							 | 
						
					
						
							
								
									
										
										
										
											2005-12-25 23:18:31 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # read until we get the required number of characters (if available)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        while True:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # can the request can be satisfied from the character buffer?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if chars < 0:
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-06 22:39:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if size < 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if self.charbuffer:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        break
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                elif len(self.charbuffer) >= size:
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    break
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if len(self.charbuffer) >= chars:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    break
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # we need more data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if size < 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                newdata = self.stream.read()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                newdata = self.stream.read(size)
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # decode bytes (those remaining from the last call included)
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            data = self.bytebuffer + newdata
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-24 07:38:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                newchars, decodedbytes = self.decode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except UnicodeDecodeError, exc:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if firstline:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    lines = newchars.splitlines(True)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if len(lines)<=1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        raise
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    raise
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # keep undecoded bytes until the next call
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.bytebuffer = data[decodedbytes:]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # put new characters in the character buffer
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.charbuffer += newchars
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # there was no data available
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not newdata:
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                break
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if chars < 0:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # Return everything we've got
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            result = self.charbuffer
							 | 
						
					
						
							
								
									
										
										
										
											2005-07-20 22:15:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.charbuffer = ""
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # Return the first chars characters
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            result = self.charbuffer[:chars]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.charbuffer = self.charbuffer[chars:]
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return result
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def readline(self, size=None, keepends=True):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Read one line from the input stream and return the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            decoded data.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            size, if given, is passed as size argument to the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            read() method.
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-18 08:34:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # If we have lines cached from an earlier read, return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # them unconditionally
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if self.linebuffer:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            line = self.linebuffer[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            del self.linebuffer[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if len(self.linebuffer) == 1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # revert to charbuffer mode; we might need more data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # next time
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                self.charbuffer = self.linebuffer[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                self.linebuffer = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if not keepends:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                line = line.splitlines(False)[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return line
							 | 
						
					
						
							
								
									
										
										
										
											2005-12-25 23:18:31 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        readsize = size or 72
							 | 
						
					
						
							
								
									
										
										
										
											2005-07-20 22:15:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        line = ""
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # If size is given, we call read() only once
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        while True:
							 | 
						
					
						
							
								
									
										
										
										
											2005-08-24 07:38:12 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            data = self.read(readsize, firstline=True)
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if data:
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-21 21:42:35 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                # If we're at a "\r" read one extra character (which might
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                # be a "\n") to get a proper line ending. If the stream is
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-21 21:32:03 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                # temporarily exhausted we return the wrong line ending.
							 | 
						
					
						
							
								
									
										
										
										
											2005-07-20 22:15:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if data.endswith("\r"):
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-04 21:38:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    data += self.read(size=1, chars=1)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            line += data
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            lines = line.splitlines(True)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if lines:
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-18 08:34:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if len(lines) > 1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    # More than one line result; the first line is a full line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    # to return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    line = lines[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    del lines[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if len(lines) > 1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        # cache the remaining lines
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        lines[-1] += self.charbuffer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        self.linebuffer = lines
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        self.charbuffer = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        # only one remaining line, put it back into charbuffer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        self.charbuffer = lines[0] + self.charbuffer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    if not keepends:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        line = line.splitlines(False)[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    break
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                line0withend = lines[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                line0withoutend = lines[0].splitlines(False)[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if line0withend != line0withoutend: # We really have a line end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    # Put the rest back together and keep it until the next call
							 | 
						
					
						
							
								
									
										
										
										
											2005-07-20 22:15:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    self.charbuffer = "".join(lines[1:]) + self.charbuffer
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    if keepends:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        line = line0withend
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        line = line0withoutend
							 | 
						
					
						
							
								
									
										
										
										
											2005-01-10 12:01:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                    break
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            # we didn't get anything or this was our only try
							 | 
						
					
						
							
								
									
										
										
										
											2005-01-10 12:01:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if not data or size is not None:
							 | 
						
					
						
							
								
									
										
										
										
											2004-12-21 22:24:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                if line and not keepends:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    line = line.splitlines(False)[0]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                break
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if readsize<8000:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                readsize *= 2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return line
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def readlines(self, sizehint=None, keepends=True):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Read all lines available on the input stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            and return them as list of lines.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Line breaks are implemented using the codec's decoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            method and are included in the list entries.
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-02-26 15:22:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            sizehint, if given, is ignored since there is no efficient
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            way to finding the true end-of-line.
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2004-09-07 20:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        data = self.read()
							 | 
						
					
						
							
								
									
										
										
										
											2004-10-17 23:51:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return data.splitlines(keepends)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Resets the codec buffers used for keeping state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Note that no stream repositioning should take place.
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-16 12:04:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            This method is primarily intended to be able to recover
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            from decoding errors.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-14 19:06:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.bytebuffer = ""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.charbuffer = u""
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-18 08:34:39 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.linebuffer = None
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-14 19:06:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-14 19:25:41 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def seek(self, offset, whence=0):
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-14 19:06:30 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """ Set the input stream's current position.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Resets the codec buffers used for keeping state.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.reset()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.seek(offset, whence)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-06 16:53:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def next(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Return the next decoded line from the input stream."""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        line = self.readline()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if line:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise StopIteration
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __iter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __getattr__(self, name,
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    getattr=getattr):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Inherit all other methods from the underlying stream.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return getattr(self.stream, name)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-10-29 08:39:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __enter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __exit__(self, type, value, tb):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.close()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								###
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamReaderWriter:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-13 14:11:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """ StreamReaderWriter instances allow wrapping streams which
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        work in both read and write modes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        The design is such that one can use the factory functions
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-16 12:04:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        returned by the codec.lookup() function to construct the
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-13 14:11:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        instance.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Optional attributes set by the file wrappers below
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    encoding = 'unknown'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, stream, Reader, Writer, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Creates a StreamReaderWriter instance.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream must be a Stream-like object.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Reader, Writer must be factory functions or classes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            providing the StreamReader, StreamWriter interface resp.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Error handling is done in the same way as defined for the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            StreamWriter/Readers.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream = stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.reader = Reader(stream, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.writer = Writer(stream, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def read(self, size=-1):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.reader.read(size)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-05-01 16:17:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def readline(self, size=None):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.reader.readline(size)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-05-01 16:17:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def readlines(self, sizehint=None):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.reader.readlines(sizehint)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-06 16:53:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def next(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Return the next decoded line from the input stream."""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.reader.next()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __iter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def write(self, data):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.writer.write(data)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def writelines(self, list):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.writer.writelines(list)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.reader.reset()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.writer.reset()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __getattr__(self, name,
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    getattr=getattr):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Inherit all other methods from the underlying stream.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return getattr(self.stream, name)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-10-29 08:39:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # these are needed to make "with codecs.open(...)" work properly
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __enter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __exit__(self, type, value, tb):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.close()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								###
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class StreamRecoder:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-13 14:11:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """ StreamRecoder instances provide a frontend - backend
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        view of encoding data.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        They use the complete set of APIs returned by the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        codecs.lookup() function to implement their task.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Data written to the stream is first decoded into an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        intermediate format (which is dependent on the given codec
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        combination) and then written to the stream using an instance
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        of the provided Writer class.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        In the other direction, data is read from the stream using a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Reader instance and then return encoded data to the caller.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Optional attributes set by the file wrappers below
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    data_encoding = 'unknown'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    file_encoding = 'unknown'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, stream, encode, decode, Reader, Writer,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                 errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Creates a StreamRecoder instance which implements a two-way
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            conversion: encode and decode work on the frontend (the
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            input to .read() and output of .write()) while
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Reader and Writer work on the backend (reading and
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-17 15:42:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            writing to the stream).
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            You can use these objects to do transparent direct
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            recodings from e.g. latin-1 to utf-8 and back.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            stream must be a file-like object.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            encode, decode must adhere to the Codec interface, Reader,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Writer must be factory functions or classes providing the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            StreamReader, StreamWriter interface resp.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            encode and decode are needed for the frontend translation,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Reader and Writer for the backend translation. Unicode is
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            used as intermediate encoding.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            Error handling is done in the same way as defined for the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            StreamWriter/Readers.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream = stream
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.encode = encode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.decode = decode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.reader = Reader(stream, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.writer = Writer(stream, errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.errors = errors
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def read(self, size=-1):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data = self.reader.read(size)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data, bytesencoded = self.encode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def readline(self, size=None):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if size is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            data = self.reader.readline()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            data = self.reader.readline(size)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data, bytesencoded = self.encode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def readlines(self, sizehint=None):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2004-02-26 15:22:17 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        data = self.reader.read()
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        data, bytesencoded = self.encode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return data.splitlines(1)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-06 16:53:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def next(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Return the next decoded line from the input stream."""
							 | 
						
					
						
							
								
									
										
										
										
											2005-09-01 11:56:53 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        data = self.reader.next()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data, bytesencoded = self.encode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return data
							 | 
						
					
						
							
								
									
										
										
										
											2002-11-06 16:53:44 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __iter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def write(self, data):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data, bytesdecoded = self.decode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.writer.write(data)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def writelines(self, list):
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data = ''.join(list)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        data, bytesdecoded = self.decode(data, self.errors)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self.writer.write(data)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def reset(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.reader.reset()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.writer.reset()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __getattr__(self, name,
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    getattr=getattr):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """ Inherit all other methods from the underlying stream.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-15 17:19:16 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return getattr(self.stream, name)
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2006-10-29 08:39:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __enter__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return self
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __exit__(self, type, value, tb):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.stream.close()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Shortcuts
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-21 21:21:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def open(filename, mode='rb', encoding=None, errors='strict', buffering=1):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Open an encoded file using the given mode and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        a wrapped version providing transparent encoding/decoding.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Note: The wrapped version will only accept the object format
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        defined by the codecs, i.e. Unicode objects for most builtin
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-16 03:51:56 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        codecs. Output is also codec dependent and will usually be
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Unicode as well.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-21 21:21:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        Files are always opened in binary mode, even if no binary mode
							 | 
						
					
						
							
								
									
										
										
										
											2003-02-02 23:08:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        was specified. This is done to avoid data loss due to encodings
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-21 21:21:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        using 8-bit values. The default file mode is 'rb' meaning to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        open the file in binary read mode.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        encoding specifies the encoding which is to be used for the
							 | 
						
					
						
							
								
									
										
										
										
											2003-02-02 23:08:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        file.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        errors may be given to define the error handling. It defaults
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        to 'strict' which causes ValueErrors to be raised in case an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        encoding error occurs.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        buffering has the same meaning as for the builtin open() API.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        It defaults to line buffered.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-13 14:11:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        The returned wrapped file object provides an extra attribute
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        .encoding which allows querying the used encoding. This
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        attribute is only available if an encoding was specified as
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        parameter.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if encoding is not None and \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       'b' not in mode:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Force opening of the file in binary mode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        mode = mode + 'b'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    file = __builtin__.open(filename, mode, buffering)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if encoding is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return file
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    info = lookup(encoding)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Add attributes to simplify introspection
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    srw.encoding = encoding
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return srw
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Return a wrapped version of file which provides transparent
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        encoding translation.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Strings written to the wrapped file are interpreted according
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        to the given data_encoding and then written to the original
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        file as string using file_encoding. The intermediate encoding
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        will usually be Unicode but depends on the specified codecs.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Strings are read from the file using file_encoding and then
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        passed back to the caller as string using data_encoding.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        If file_encoding is not given, it defaults to data_encoding.
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        errors may be given to define the error handling. It defaults
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        to 'strict' which causes ValueErrors to be raised in case an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        encoding error occurs.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-13 14:11:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        The returned wrapped file object provides two extra attributes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        .data_encoding and .file_encoding which reflect the given
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        parameters of the same name. The attributes can be used for
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        introspection by Python programs.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if file_encoding is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        file_encoding = data_encoding
							 | 
						
					
						
							
								
									
										
										
										
											2006-10-29 08:39:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    data_info = lookup(data_encoding)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    file_info = lookup(file_encoding)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sr = StreamRecoder(file, data_info.encode, data_info.decode,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                       file_info.streamreader, file_info.streamwriter, errors)
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Add attributes to simplify introspection
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sr.data_encoding = data_encoding
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sr.file_encoding = file_encoding
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return sr
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-19 11:24:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								### Helpers for codec lookup
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getencoder(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its encoder function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return lookup(encoding).encode
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-19 11:24:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getdecoder(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its decoder function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return lookup(encoding).decode
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getincrementalencoder(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its IncrementalEncoder class or factory function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        or the codecs doesn't provide an incremental encoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    encoder = lookup(encoding).incrementalencoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if encoder is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise LookupError(encoding)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return encoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getincrementaldecoder(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its IncrementalDecoder class or factory function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        or the codecs doesn't provide an incremental decoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    decoder = lookup(encoding).incrementaldecoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if decoder is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        raise LookupError(encoding)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return decoder
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-19 11:24:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getreader(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its StreamReader class or factory function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return lookup(encoding).streamreader
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-19 11:24:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def getwriter(encoding):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Lookup up the codec for the given encoding and return
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        its StreamWriter class or factory function.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Raises a LookupError in case the encoding cannot be found.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							
								
									
										
										
										
											2006-03-15 11:35:15 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return lookup(encoding).streamwriter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def iterencode(iterator, encoding, errors='strict', **kwargs):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Encoding iterator.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Encodes the input strings from the iterator using a IncrementalEncoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    errors and kwargs are passed through to the IncrementalEncoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    constructor.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    encoder = getincrementalencoder(encoding)(errors, **kwargs)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for input in iterator:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        output = encoder.encode(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if output:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            yield output
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    output = encoder.encode("", True)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if output:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        yield output
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def iterdecode(iterator, encoding, errors='strict', **kwargs):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Decoding iterator.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Decodes the input strings from the iterator using a IncrementalDecoder.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    errors and kwargs are passed through to the IncrementalDecoder
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    constructor.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    decoder = getincrementaldecoder(encoding)(errors, **kwargs)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for input in iterator:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        output = decoder.decode(input)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if output:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            yield output
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    output = decoder.decode("", True)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if output:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        yield output
							 | 
						
					
						
							
								
									
										
										
										
											2001-09-19 11:24:48 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-03 21:29:14 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								### Helpers for charmap-based codecs
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def make_identity_dict(rng):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ make_identity_dict(rng) -> dict
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Return a dictionary where elements of the rng sequence are
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        mapped to themselves.
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-14 23:36:06 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-03 21:29:14 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    res = {}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for i in rng:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        res[i]=i
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return res
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-16 09:41:45 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								def make_encoding_map(decoding_map):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """ Creates an encoding map from a decoding map.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2003-02-02 23:08:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        If a target mapping in the decoding map occurs multiple
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-16 09:41:45 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        times, then that target is mapped to None (undefined mapping),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        causing an exception when encountered by the charmap codec
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        during translation.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        One example where this happens is cp875.py which decodes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        multiple character to \u001a.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    """
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    m = {}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    for k,v in decoding_map.items():
							 | 
						
					
						
							
								
									
										
										
										
											2002-06-01 14:18:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if not v in m:
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-16 09:41:45 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            m[v] = k
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            m[v] = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return m
							 | 
						
					
						
							
								
									
										
										
										
											2001-05-29 06:06:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								### error handlers
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2005-03-08 15:03:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    strict_errors = lookup_error("strict")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ignore_errors = lookup_error("ignore")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    replace_errors = lookup_error("replace")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    backslashreplace_errors = lookup_error("backslashreplace")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except LookupError:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # In --disable-unicode builds, these error handler are missing
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    strict_errors = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    ignore_errors = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    replace_errors = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    xmlcharrefreplace_errors = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    backslashreplace_errors = None
							 | 
						
					
						
							
								
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-07-31 08:54:55 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								# Tell modulefinder that using codecs probably needs the encodings
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# package
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								_false = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								if _false:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    import encodings
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								### Tests
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-03-10 23:20:43 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								if __name__ == '__main__':
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Make stdout translate Latin-1 output into UTF-8 output
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:41:38 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-04-11 15:37:43 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    # Have stdin translate Latin-1 input into UTF-8 input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
							 |