| 
									
										
										
										
											1998-08-10 19:42:37 +00:00
										 |  |  | \section{\module{struct} --- | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  |          Interpret strings as packed binary data} | 
					
						
							| 
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 |  |  | \declaremodule{builtin}{struct} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \modulesynopsis{Interpret strings as packed binary data.} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | \indexii{C}{structures} | 
					
						
							|  |  |  | \indexiii{packing}{binary}{data} | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | This module performs conversions between Python values and C | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | structs represented as Python strings.  It uses \dfn{format strings} | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | (explained below) as compact descriptions of the lay-out of the C | 
					
						
							|  |  |  | structs and the intended conversion to/from Python values.  This can | 
					
						
							|  |  |  | be used in handling binary data stored in files or from network | 
					
						
							|  |  |  | connections, among other sources. | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | The module defines the following exception and functions: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-08 07:44:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | \begin{excdesc}{error} | 
					
						
							|  |  |  |   Exception raised on various occasions; argument is a string | 
					
						
							|  |  |  |   describing what is wrong. | 
					
						
							|  |  |  | \end{excdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | \begin{funcdesc}{pack}{fmt, v1, v2, \textrm{\ldots}} | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  |   Return a string containing the values | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  |   \code{\var{v1}, \var{v2}, \textrm{\ldots}} packed according to the given | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  |   format.  The arguments must match the values required by the format | 
					
						
							|  |  |  |   exactly. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{unpack}{fmt, string} | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  |   Unpack the string (presumably packed by \code{pack(\var{fmt}, | 
					
						
							|  |  |  |   \textrm{\ldots})}) according to the given format.  The result is a | 
					
						
							|  |  |  |   tuple even if it contains exactly one item.  The string must contain | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  |   exactly the amount of data required by the format | 
					
						
							|  |  |  |   (\code{len(\var{string})} must equal \code{calcsize(\var{fmt})}). | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{funcdesc}{calcsize}{fmt} | 
					
						
							|  |  |  |   Return the size of the struct (and hence of the string) | 
					
						
							|  |  |  |   corresponding to the given format. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | Format characters have the following meaning; the conversion between | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | C and Python values should be obvious given their types: | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2000-04-03 20:13:55 +00:00
										 |  |  | \begin{tableiv}{c|l|l|c}{samp}{Format}{C Type}{Python}{Notes} | 
					
						
							|  |  |  |   \lineiv{x}{pad byte}{no value}{} | 
					
						
							|  |  |  |   \lineiv{c}{\ctype{char}}{string of length 1}{} | 
					
						
							|  |  |  |   \lineiv{b}{\ctype{signed char}}{integer}{} | 
					
						
							|  |  |  |   \lineiv{B}{\ctype{unsigned char}}{integer}{} | 
					
						
							|  |  |  |   \lineiv{h}{\ctype{short}}{integer}{} | 
					
						
							|  |  |  |   \lineiv{H}{\ctype{unsigned short}}{integer}{} | 
					
						
							|  |  |  |   \lineiv{i}{\ctype{int}}{integer}{} | 
					
						
							| 
									
										
										
										
											2001-06-10 23:40:19 +00:00
										 |  |  |   \lineiv{I}{\ctype{unsigned int}}{long}{} | 
					
						
							| 
									
										
										
										
											2000-04-03 20:13:55 +00:00
										 |  |  |   \lineiv{l}{\ctype{long}}{integer}{} | 
					
						
							|  |  |  |   \lineiv{L}{\ctype{unsigned long}}{long}{} | 
					
						
							| 
									
										
										
										
											2001-06-10 23:40:19 +00:00
										 |  |  |   \lineiv{q}{\ctype{long long}}{long}{(1)} | 
					
						
							|  |  |  |   \lineiv{Q}{\ctype{unsigned long long}}{long}{(1)} | 
					
						
							| 
									
										
										
										
											2000-04-03 20:13:55 +00:00
										 |  |  |   \lineiv{f}{\ctype{float}}{float}{} | 
					
						
							|  |  |  |   \lineiv{d}{\ctype{double}}{float}{} | 
					
						
							|  |  |  |   \lineiv{s}{\ctype{char[]}}{string}{} | 
					
						
							|  |  |  |   \lineiv{p}{\ctype{char[]}}{string}{} | 
					
						
							|  |  |  |   \lineiv{P}{\ctype{void *}}{integer}{} | 
					
						
							|  |  |  | \end{tableiv} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \noindent | 
					
						
							|  |  |  | Notes: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \begin{description} | 
					
						
							|  |  |  | \item[(1)] | 
					
						
							| 
									
										
										
										
											2001-06-10 23:40:19 +00:00
										 |  |  |   The \character{q} and \character{Q} conversion codes are available in | 
					
						
							|  |  |  |   native mode only if the platform C compiler supports C \ctype{long long}, | 
					
						
							| 
									
										
										
										
											2001-06-15 14:13:07 +00:00
										 |  |  |   or, on Windows, \ctype{__int64}.  They are always available in standard | 
					
						
							| 
									
										
										
										
											2001-06-12 01:22:22 +00:00
										 |  |  |   modes. | 
					
						
							| 
									
										
										
										
											2001-06-15 14:13:07 +00:00
										 |  |  |   \versionadded{2.2} | 
					
						
							| 
									
										
										
										
											2000-04-03 20:13:55 +00:00
										 |  |  | \end{description} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  | A format character may be preceded by an integral repeat count.  For | 
					
						
							|  |  |  | example, the format string \code{'4h'} means exactly the same as | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | \code{'hhhh'}. | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-08-26 20:39:54 +00:00
										 |  |  | Whitespace characters between formats are ignored; a count and its | 
					
						
							|  |  |  | format must not contain whitespace though. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-23 21:18:25 +00:00
										 |  |  | For the \character{s} format character, the count is interpreted as the | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | size of the string, not a repeat count like for the other format | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  | characters; for example, \code{'10s'} means a single 10-byte string, while | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | \code{'10c'} means 10 characters.  For packing, the string is | 
					
						
							|  |  |  | truncated or padded with null bytes as appropriate to make it fit. | 
					
						
							|  |  |  | For unpacking, the resulting string always has exactly the specified | 
					
						
							|  |  |  | number of bytes.  As a special case, \code{'0s'} means a single, empty | 
					
						
							|  |  |  | string (while \code{'0c'} means 0 characters). | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-15 18:09:22 +00:00
										 |  |  | The \character{p} format character encodes a "Pascal string", meaning | 
					
						
							|  |  |  | a short variable-length string stored in a fixed number of bytes. | 
					
						
							|  |  |  | The count is the total number of bytes stored.  The first byte stored is | 
					
						
							|  |  |  | the length of the string, or 255, whichever is smaller.  The bytes | 
					
						
							|  |  |  | of the string follow.  If the string passed in to \function{pack()} is too | 
					
						
							|  |  |  | long (longer than the count minus 1), only the leading count-1 bytes of the | 
					
						
							| 
									
										
										
										
											2001-09-15 18:16:27 +00:00
										 |  |  | string are stored.  If the string is shorter than count-1, it is padded | 
					
						
							| 
									
										
										
										
											2001-09-15 18:09:22 +00:00
										 |  |  | with null bytes so that exactly count bytes in all are used.  Note that | 
					
						
							|  |  |  | for \function{unpack()}, the \character{p} format character consumes count | 
					
						
							|  |  |  | bytes, but that the string returned can never contain more than 255 | 
					
						
							|  |  |  | characters. | 
					
						
							| 
									
										
										
										
											1998-07-23 21:18:25 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-06-12 01:22:22 +00:00
										 |  |  | For the \character{I}, \character{L}, \character{q} and \character{Q} | 
					
						
							|  |  |  | format characters, the return value is a Python long integer. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-09-21 14:44:34 +00:00
										 |  |  | For the \character{P} format character, the return value is a Python | 
					
						
							|  |  |  | integer or long integer, depending on the size needed to hold a | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | pointer when it has been cast to an integer type.  A \NULL{} pointer will | 
					
						
							|  |  |  | always be returned as the Python integer \code{0}. When packing pointer-sized | 
					
						
							| 
									
										
										
										
											1998-09-21 14:44:34 +00:00
										 |  |  | values, Python integer or long integer objects may be used.  For | 
					
						
							|  |  |  | example, the Alpha and Merced processors use 64-bit pointer values, | 
					
						
							|  |  |  | meaning a Python long integer will be used to hold the pointer; other | 
					
						
							|  |  |  | platforms use 32-bit pointers and will use a Python integer. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | By default, C numbers are represented in the machine's native format | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | and byte order, and properly aligned by skipping pad bytes if | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | necessary (according to the rules used by the C compiler). | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Alternatively, the first character of the format string can be used to | 
					
						
							|  |  |  | indicate the byte order, size and alignment of the packed data, | 
					
						
							|  |  |  | according to the following table: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-04-11 20:53:03 +00:00
										 |  |  | \begin{tableiii}{c|l|l}{samp}{Character}{Byte order}{Size and alignment} | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  |   \lineiii{@}{native}{native} | 
					
						
							|  |  |  |   \lineiii{=}{native}{standard} | 
					
						
							|  |  |  |   \lineiii{<}{little-endian}{standard} | 
					
						
							|  |  |  |   \lineiii{>}{big-endian}{standard} | 
					
						
							|  |  |  |   \lineiii{!}{network (= big-endian)}{standard} | 
					
						
							|  |  |  | \end{tableiii} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-23 21:18:25 +00:00
										 |  |  | If the first character is not one of these, \character{@} is assumed. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Native byte order is big-endian or little-endian, depending on the | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  | host system.  For example, Motorola and Sun processors are big-endian; | 
					
						
							|  |  |  | Intel and DEC processors are little-endian. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  | Native size and alignment are determined using the C compiler's | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | \keyword{sizeof} expression.  This is always combined with native byte | 
					
						
							|  |  |  | order. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | Standard size and alignment are as follows: no alignment is required | 
					
						
							| 
									
										
										
										
											2001-06-12 01:22:22 +00:00
										 |  |  | for any type (so you have to use pad bytes); | 
					
						
							|  |  |  | \ctype{short} is 2 bytes; | 
					
						
							|  |  |  | \ctype{int} and \ctype{long} are 4 bytes; | 
					
						
							|  |  |  | \ctype{long long} (\ctype{__int64} on Windows) is 8 bytes; | 
					
						
							|  |  |  | \ctype{float} and \ctype{double} are 32-bit and 64-bit | 
					
						
							|  |  |  | IEEE floating point numbers, respectively. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | Note the difference between \character{@} and \character{=}: both use | 
					
						
							|  |  |  | native byte order, but the size and alignment of the latter is | 
					
						
							|  |  |  | standardized. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-07-23 21:18:25 +00:00
										 |  |  | The form \character{!} is available for those poor souls who claim they | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | can't remember whether network byte order is big-endian or | 
					
						
							|  |  |  | little-endian. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  | There is no way to indicate non-native byte order (force | 
					
						
							| 
									
										
										
										
											1998-07-23 21:18:25 +00:00
										 |  |  | byte-swapping); use the appropriate choice of \character{<} or | 
					
						
							|  |  |  | \character{>}. | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-09-21 14:44:34 +00:00
										 |  |  | The \character{P} format character is only available for the native | 
					
						
							|  |  |  | byte ordering (selected as the default or with the \character{@} byte | 
					
						
							|  |  |  | order character). The byte order character \character{=} chooses to | 
					
						
							|  |  |  | use little- or big-endian ordering based on the host system. The | 
					
						
							|  |  |  | struct module does not interpret this as native ordering, so the | 
					
						
							|  |  |  | \character{P} format is not available. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-12-31 02:22:14 +00:00
										 |  |  | Examples (all using native byte order, size and alignment, on a | 
					
						
							|  |  |  | big-endian machine): | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-02-13 06:58:54 +00:00
										 |  |  | \begin{verbatim} | 
					
						
							| 
									
										
										
										
											1997-01-03 04:20:09 +00:00
										 |  |  | >>> from struct import * | 
					
						
							|  |  |  | >>> pack('hhl', 1, 2, 3) | 
					
						
							| 
									
										
										
										
											2001-01-24 17:19:08 +00:00
										 |  |  | '\x00\x01\x00\x02\x00\x00\x00\x03' | 
					
						
							|  |  |  | >>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03') | 
					
						
							| 
									
										
										
										
											1997-01-03 04:20:09 +00:00
										 |  |  | (1, 2, 3) | 
					
						
							|  |  |  | >>> calcsize('hhl') | 
					
						
							|  |  |  | 8 | 
					
						
							| 
									
										
										
										
											1998-02-13 06:58:54 +00:00
										 |  |  | \end{verbatim} | 
					
						
							| 
									
										
										
										
											1998-11-30 22:14:58 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | Hint: to align the end of a structure to the alignment requirement of | 
					
						
							|  |  |  | a particular type, end the format with the code for that type with a | 
					
						
							| 
									
										
										
										
											2001-07-06 20:30:11 +00:00
										 |  |  | repeat count of zero.  For example, the format \code{'llh0l'} | 
					
						
							|  |  |  | specifies two pad bytes at the end, assuming longs are aligned on | 
					
						
							|  |  |  | 4-byte boundaries.  This only works when native size and alignment are | 
					
						
							|  |  |  | in effect; standard size and alignment does not enforce any alignment. | 
					
						
							| 
									
										
										
										
											1998-03-08 07:44:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | \begin{seealso} | 
					
						
							| 
									
										
										
										
											1999-08-24 20:16:29 +00:00
										 |  |  |   \seemodule{array}{Packed binary storage of homogeneous data.} | 
					
						
							|  |  |  |   \seemodule{xdrlib}{Packing and unpacking of XDR data.} | 
					
						
							| 
									
										
										
										
											1998-03-08 07:44:13 +00:00
										 |  |  | \end{seealso} |