mirror of
				https://github.com/python/cpython.git
				synced 2025-10-27 19:54:38 +00:00 
			
		
		
		
	 511e2cacc4
			
		
	
	
		511e2cacc4
		
	
	
	
	
		
			
			Mostly by Toby Dickenson and Titus Brown. Add an optional argument to a decompression object's decompress() method. The argument specifies the maximum length of the return value. If the uncompressed data exceeds this length, the excess data is stored as the unconsumed_tail attribute. (Not to be confused with unused_data, which is a separate issue.) Difference from SF patch: Default value for unconsumed_tail is "" rather than None. It's simpler if the attribute is always a string.
		
			
				
	
	
		
			171 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \section{\module{zlib} ---
 | |
|          Compression compatible with \program{gzip}}
 | |
| 
 | |
| \declaremodule{builtin}{zlib}
 | |
| \modulesynopsis{Low-level interface to compression and decompression
 | |
|                 routines compatible with \program{gzip}.}
 | |
| 
 | |
| 
 | |
| For applications that require data compression, the functions in this
 | |
| module allow compression and decompression, using the zlib library.
 | |
| The zlib library has its own home page at
 | |
| \url{http://www.gzip.org/zlib/}.  Version 1.1.3 is the
 | |
| most recent version as of September 2000; use a later version if one
 | |
| is available.  There are known incompatibilities between the Python
 | |
| module and earlier versions of the zlib library.
 | |
| 
 | |
| The available exception and functions in this module are:
 | |
| 
 | |
| \begin{excdesc}{error}
 | |
|   Exception raised on compression and decompression errors.
 | |
| \end{excdesc}
 | |
| 
 | |
| 
 | |
| \begin{funcdesc}{adler32}{string\optional{, value}}
 | |
|    Computes a Adler-32 checksum of \var{string}.  (An Adler-32
 | |
|    checksum is almost as reliable as a CRC32 but can be computed much
 | |
|    more quickly.)  If \var{value} is present, it is used as the
 | |
|    starting value of the checksum; otherwise, a fixed default value is
 | |
|    used.  This allows computing a running checksum over the
 | |
|    concatenation of several input strings.  The algorithm is not
 | |
|    cryptographically strong, and should not be used for
 | |
|    authentication or digital signatures.  Since the algorithm is
 | |
|    designed for use as a checksum algorithm, it is not suitable for
 | |
|    use as a general hash algorithm.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{compress}{string\optional{, level}}
 | |
|   Compresses the data in \var{string}, returning a string contained
 | |
|   compressed data.  \var{level} is an integer from \code{1} to
 | |
|   \code{9} controlling the level of compression; \code{1} is fastest
 | |
|   and produces the least compression, \code{9} is slowest and produces
 | |
|   the most.  The default value is \code{6}.  Raises the
 | |
|   \exception{error} exception if any error occurs.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{compressobj}{\optional{level}}
 | |
|   Returns a compression object, to be used for compressing data streams
 | |
|   that won't fit into memory at once.  \var{level} is an integer from
 | |
|   \code{1} to \code{9} controlling the level of compression; \code{1} is
 | |
|   fastest and produces the least compression, \code{9} is slowest and
 | |
|   produces the most.  The default value is \code{6}.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{crc32}{string\optional{, value}}
 | |
|   Computes a CRC (Cyclic Redundancy Check)%
 | |
|   \index{Cyclic Redundancy Check}
 | |
|   \index{checksum!Cyclic Redundancy Check}
 | |
|   checksum of \var{string}. If
 | |
|   \var{value} is present, it is used as the starting value of the
 | |
|   checksum; otherwise, a fixed default value is used.  This allows
 | |
|   computing a running checksum over the concatenation of several
 | |
|   input strings.  The algorithm is not cryptographically strong, and
 | |
|   should not be used for authentication or digital signatures.  Since
 | |
|   the algorithm is designed for use as a checksum algorithm, it is not
 | |
|   suitable for use as a general hash algorithm.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{decompress}{string\optional{, wbits\optional{, bufsize}}}
 | |
|   Decompresses the data in \var{string}, returning a string containing
 | |
|   the uncompressed data.  The \var{wbits} parameter controls the size of
 | |
|   the window buffer.  If \var{bufsize} is given, it is used as the
 | |
|   initial size of the output buffer.  Raises the \exception{error}
 | |
|   exception if any error occurs.
 | |
| 
 | |
| The absolute value of \var{wbits} is the base two logarithm of the
 | |
| size of the history buffer (the ``window size'') used when compressing
 | |
| data.  Its absolute value should be between 8 and 15 for the most
 | |
| recent versions of the zlib library, larger values resulting in better
 | |
| compression at the expense of greater memory usage.  The default value
 | |
| is 15.  When \var{wbits} is negative, the standard
 | |
| \program{gzip} header is suppressed; this is an undocumented feature
 | |
| of the zlib library, used for compatibility with \program{unzip}'s
 | |
| compression file format.
 | |
| 
 | |
| \var{bufsize} is the initial size of the buffer used to hold
 | |
| decompressed data.  If more space is required, the buffer size will be
 | |
| increased as needed, so you don't have to get this value exactly
 | |
| right; tuning it will only save a few calls to \cfunction{malloc()}.  The
 | |
| default size is 16384.
 | |
|    
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{decompressobj}{\optional{wbits}}
 | |
|   Returns a decompression object, to be used for decompressing data
 | |
|   streams that won't fit into memory at once.  The \var{wbits}
 | |
|   parameter controls the size of the window buffer.
 | |
| \end{funcdesc}
 | |
| 
 | |
| Compression objects support the following methods:
 | |
| 
 | |
| \begin{methoddesc}[Compress]{compress}{string}
 | |
| Compress \var{string}, returning a string containing compressed data
 | |
| for at least part of the data in \var{string}.  This data should be
 | |
| concatenated to the output produced by any preceding calls to the
 | |
| \method{compress()} method.  Some input may be kept in internal buffers
 | |
| for later processing.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[Compress]{flush}{\optional{mode}}
 | |
| All pending input is processed, and a string containing the remaining
 | |
| compressed output is returned.  \var{mode} can be selected from the
 | |
| constants \constant{Z_SYNC_FLUSH},  \constant{Z_FULL_FLUSH},  or 
 | |
| \constant{Z_FINISH}, defaulting to \constant{Z_FINISH}.  \constant{Z_SYNC_FLUSH} and 
 | |
| \constant{Z_FULL_FLUSH} allow compressing further strings of data and
 | |
| are used to allow partial error recovery on decompression, while
 | |
| \constant{Z_FINISH} finishes the compressed stream and 
 | |
| prevents compressing any more data.  After calling
 | |
| \method{flush()} with \var{mode} set to \constant{Z_FINISH}, the
 | |
| \method{compress()} method cannot be called again; the only realistic
 | |
| action is to delete the object.  
 | |
| \end{methoddesc}
 | |
| 
 | |
| Decompression objects support the following methods, and two attributes:
 | |
| 
 | |
| \begin{memberdesc}{unused_data}
 | |
| A string which contains any unused data from the last string fed to
 | |
| this decompression object.  If the whole string turned out to contain
 | |
| compressed data, this is \code{""}, the empty string. 
 | |
| 
 | |
| The only way to determine where a string of compressed data ends is by
 | |
| actually decompressing it.  This means that when compressed data is
 | |
| contained part of a larger file, you can only find the end of it by
 | |
| reading data and feeding it into a decompression object's
 | |
| \method{decompress} method until the \member{unused_data} attribute is
 | |
| no longer the empty string.  
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{unconsumed_tail}
 | |
| A string that contains any data that was not consumed by the last
 | |
| \method{decompress} call because it exceeded the limit for the
 | |
| uncompressed data buffer.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
 | |
| Decompress \var{string}, returning a string containing the
 | |
| uncompressed data corresponding to at least part of the data in
 | |
| \var{string}.  This data should be concatenated to the output produced
 | |
| by any preceding calls to the
 | |
| \method{decompress()} method.  Some of the input data may be preserved
 | |
| in internal buffers for later processing.
 | |
| 
 | |
| If the optional parameter \var{max_length} is supplied then the return value
 | |
| will be no longer than \var{max_length}. This may mean that not all of the
 | |
| compressed input can be processed; and unconsumed data will be stored
 | |
| in the attribute \member{unconsumed_tail}. This string must be passed
 | |
| to a subsequent call to \method{decompress()} if decompression is to
 | |
| continue.  If \var{max_length} is not supplied then the whole input is
 | |
| decompressed, and \member{unconsumed_tail} is an empty string.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[Decompress]{flush}{}
 | |
| All pending input is processed, and a string containing the remaining
 | |
| uncompressed output is returned.  After calling \method{flush()}, the
 | |
| \method{decompress()} method cannot be called again; the only realistic
 | |
| action is to delete the object.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{seealso}
 | |
|   \seemodule{gzip}{Reading and writing \program{gzip}-format files.}
 | |
|   \seeurl{http://www.gzip.org/zlib/}{The zlib library home page.}
 | |
| \end{seealso}
 |