mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			654 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			654 lines
		
	
	
	
		
			25 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| \section{\module{xml.parsers.expat} ---
 | |
|          Fast XML parsing using Expat}
 | |
| 
 | |
| % Markup notes:
 | |
| %
 | |
| % Many of the attributes of the XMLParser objects are callbacks.
 | |
| % Since signature information must be presented, these are described
 | |
| % using the methoddesc environment.  Since they are attributes which
 | |
| % are set by client code, in-text references to these attributes
 | |
| % should be marked using the \member macro and should not include the
 | |
| % parentheses used when marking functions and methods.
 | |
| 
 | |
| \declaremodule{standard}{xml.parsers.expat}
 | |
| \modulesynopsis{An interface to the Expat non-validating XML parser.}
 | |
| \moduleauthor{Paul Prescod}{paul@prescod.net}
 | |
| 
 | |
| \versionadded{2.0}
 | |
| 
 | |
| The \module{xml.parsers.expat} module is a Python interface to the
 | |
| Expat\index{Expat} non-validating XML parser.
 | |
| The module provides a single extension type, \class{xmlparser}, that
 | |
| represents the current state of an XML parser.  After an
 | |
| \class{xmlparser} object has been created, various attributes of the object 
 | |
| can be set to handler functions.  When an XML document is then fed to
 | |
| the parser, the handler functions are called for the character data
 | |
| and markup in the XML document.
 | |
| 
 | |
| This module uses the \module{pyexpat}\refbimodindex{pyexpat} module to
 | |
| provide access to the Expat parser.  Direct use of the
 | |
| \module{pyexpat} module is deprecated.
 | |
| 
 | |
| This module provides one exception and one type object:
 | |
| 
 | |
| \begin{excdesc}{ExpatError}
 | |
|   The exception raised when Expat reports an error.  See section
 | |
|   \ref{expaterror-objects}, ``ExpatError Exceptions,'' for more
 | |
|   information on interpreting Expat errors.
 | |
| \end{excdesc}
 | |
| 
 | |
| \begin{excdesc}{error}
 | |
|   Alias for \exception{ExpatError}.
 | |
| \end{excdesc}
 | |
| 
 | |
| \begin{datadesc}{XMLParserType}
 | |
|   The type of the return values from the \function{ParserCreate()}
 | |
|   function.
 | |
| \end{datadesc}
 | |
| 
 | |
| 
 | |
| The \module{xml.parsers.expat} module contains two functions:
 | |
| 
 | |
| \begin{funcdesc}{ErrorString}{errno}
 | |
| Returns an explanatory string for a given error number \var{errno}.
 | |
| \end{funcdesc}
 | |
| 
 | |
| \begin{funcdesc}{ParserCreate}{\optional{encoding\optional{,
 | |
|                                namespace_separator}}}
 | |
| Creates and returns a new \class{xmlparser} object.  
 | |
| \var{encoding}, if specified, must be a string naming the encoding 
 | |
| used by the XML data.  Expat doesn't support as many encodings as
 | |
| Python does, and its repertoire of encodings can't be extended; it
 | |
| supports UTF-8, UTF-16, ISO-8859-1 (Latin1), and ASCII.  If
 | |
| \var{encoding} is given it will override the implicit or explicit
 | |
| encoding of the document.
 | |
| 
 | |
| Expat can optionally do XML namespace processing for you, enabled by
 | |
| providing a value for \var{namespace_separator}.  The value must be a
 | |
| one-character string; a \exception{ValueError} will be raised if the
 | |
| string has an illegal length (\code{None} is considered the same as
 | |
| omission).  When namespace processing is enabled, element type names
 | |
| and attribute names that belong to a namespace will be expanded.  The
 | |
| element name passed to the element handlers
 | |
| \member{StartElementHandler} and \member{EndElementHandler}
 | |
| will be the concatenation of the namespace URI, the namespace
 | |
| separator character, and the local part of the name.  If the namespace
 | |
| separator is a zero byte (\code{chr(0)}) then the namespace URI and
 | |
| the local part will be concatenated without any separator.
 | |
| 
 | |
| For example, if \var{namespace_separator} is set to a space character
 | |
| (\character{ }) and the following document is parsed:
 | |
| 
 | |
| \begin{verbatim}
 | |
| <?xml version="1.0"?>
 | |
| <root xmlns    = "http://default-namespace.org/"
 | |
|       xmlns:py = "http://www.python.org/ns/">
 | |
|   <py:elem1 />
 | |
|   <elem2 xmlns="" />
 | |
| </root>
 | |
| \end{verbatim}
 | |
| 
 | |
| \member{StartElementHandler} will receive the following strings
 | |
| for each element:
 | |
| 
 | |
| \begin{verbatim}
 | |
| http://default-namespace.org/ root
 | |
| http://www.python.org/ns/ elem1
 | |
| elem2
 | |
| \end{verbatim}
 | |
| \end{funcdesc}
 | |
| 
 | |
| 
 | |
| \begin{seealso}
 | |
|   \seetitle[http://www.libexpat.org/]{The Expat XML Parser}
 | |
|            {Home page of the Expat project.}
 | |
| \end{seealso}
 | |
| 
 | |
| 
 | |
| \subsection{XMLParser Objects \label{xmlparser-objects}}
 | |
| 
 | |
| \class{xmlparser} objects have the following methods:
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{Parse}{data\optional{, isfinal}}
 | |
| Parses the contents of the string \var{data}, calling the appropriate
 | |
| handler functions to process the parsed data.  \var{isfinal} must be
 | |
| true on the final call to this method.  \var{data} can be the empty
 | |
| string at any time.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{ParseFile}{file}
 | |
| Parse XML data reading from the object \var{file}.  \var{file} only
 | |
| needs to provide the \method{read(\var{nbytes})} method, returning the
 | |
| empty string when there's no more data.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{SetBase}{base}
 | |
| Sets the base to be used for resolving relative URIs in system
 | |
| identifiers in declarations.  Resolving relative identifiers is left
 | |
| to the application: this value will be passed through as the
 | |
| \var{base} argument to the \function{ExternalEntityRefHandler},
 | |
| \function{NotationDeclHandler}, and
 | |
| \function{UnparsedEntityDeclHandler} functions.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{GetBase}{}
 | |
| Returns a string containing the base set by a previous call to
 | |
| \method{SetBase()}, or \code{None} if 
 | |
| \method{SetBase()} hasn't been called.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{GetInputContext}{}
 | |
| Returns the input data that generated the current event as a string.
 | |
| The data is in the encoding of the entity which contains the text.
 | |
| When called while an event handler is not active, the return value is
 | |
| \code{None}.
 | |
| \versionadded{2.1}
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{ExternalEntityParserCreate}{context\optional{,
 | |
|                                                           encoding}}
 | |
| Create a ``child'' parser which can be used to parse an external
 | |
| parsed entity referred to by content parsed by the parent parser.  The
 | |
| \var{context} parameter should be the string passed to the
 | |
| \method{ExternalEntityRefHandler()} handler function, described below.
 | |
| The child parser is created with the \member{ordered_attributes},
 | |
| \member{returns_unicode} and \member{specified_attributes} set to the
 | |
| values of this parser.
 | |
| \end{methoddesc}
 | |
| 
 | |
| 
 | |
| \class{xmlparser} objects have the following attributes:
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{buffer_size}
 | |
| The size of the buffer used when \member{buffer_text} is true.  This
 | |
| value cannot be changed at this time.
 | |
| \versionadded{2.3}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{buffer_text}
 | |
| Setting this to true causes the \class{xmlparser} object to buffer
 | |
| textual content returned by Expat to avoid multiple calls to the
 | |
| \method{CharacterDataHandler()} callback whenever possible.  This can
 | |
| improve performance substantially since Expat normally breaks
 | |
| character data into chunks at every line ending.  This attribute is
 | |
| false by default, and may be changed at any time.
 | |
| \versionadded{2.3}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{buffer_used}
 | |
| If \member{buffer_text} is enabled, the number of bytes stored in the
 | |
| buffer.  These bytes represent UTF-8 encoded text.  This attribute has
 | |
| no meaningful interpretation when \member{buffer_text} is false.
 | |
| \versionadded{2.3}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{ordered_attributes}
 | |
| Setting this attribute to a non-zero integer causes the attributes to
 | |
| be reported as a list rather than a dictionary.  The attributes are
 | |
| presented in the order found in the document text.  For each
 | |
| attribute, two list entries are presented: the attribute name and the
 | |
| attribute value.  (Older versions of this module also used this
 | |
| format.)  By default, this attribute is false; it may be changed at
 | |
| any time.
 | |
| \versionadded{2.1}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{returns_unicode} 
 | |
| If this attribute is set to a non-zero integer, the handler functions
 | |
| will be passed Unicode strings.  If \member{returns_unicode} is 0,
 | |
| 8-bit strings containing UTF-8 encoded data will be passed to the
 | |
| handlers.
 | |
| \versionchanged[Can be changed at any time to affect the result
 | |
|   type]{1.6}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{specified_attributes}
 | |
| If set to a non-zero integer, the parser will report only those
 | |
| attributes which were specified in the document instance and not those
 | |
| which were derived from attribute declarations.  Applications which
 | |
| set this need to be especially careful to use what additional
 | |
| information is available from the declarations as needed to comply
 | |
| with the standards for the behavior of XML processors.  By default,
 | |
| this attribute is false; it may be changed at any time.
 | |
| \versionadded{2.1}
 | |
| \end{memberdesc}
 | |
| 
 | |
| The following attributes contain values relating to the most recent
 | |
| error encountered by an \class{xmlparser} object, and will only have
 | |
| correct values once a call to \method{Parse()} or \method{ParseFile()}
 | |
| has raised a \exception{xml.parsers.expat.ExpatError} exception.
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{ErrorByteIndex} 
 | |
| Byte index at which an error occurred.
 | |
| \end{memberdesc} 
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{ErrorCode} 
 | |
| Numeric code specifying the problem.  This value can be passed to the
 | |
| \function{ErrorString()} function, or compared to one of the constants
 | |
| defined in the \code{errors} object.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{ErrorColumnNumber} 
 | |
| Column number at which an error occurred.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[xmlparser]{ErrorLineNumber}
 | |
| Line number at which an error occurred.
 | |
| \end{memberdesc}
 | |
| 
 | |
| Here is the list of handlers that can be set.  To set a handler on an
 | |
| \class{xmlparser} object \var{o}, use
 | |
| \code{\var{o}.\var{handlername} = \var{func}}.  \var{handlername} must
 | |
| be taken from the following list, and \var{func} must be a callable
 | |
| object accepting the correct number of arguments.  The arguments are
 | |
| all strings, unless otherwise stated.
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{XmlDeclHandler}{version, encoding, standalone}
 | |
| Called when the XML declaration is parsed.  The XML declaration is the
 | |
| (optional) declaration of the applicable version of the XML
 | |
| recommendation, the encoding of the document text, and an optional
 | |
| ``standalone'' declaration.  \var{version} and \var{encoding} will be
 | |
| strings of the type dictated by the \member{returns_unicode}
 | |
| attribute, and \var{standalone} will be \code{1} if the document is
 | |
| declared standalone, \code{0} if it is declared not to be standalone,
 | |
| or \code{-1} if the standalone clause was omitted.
 | |
| This is only available with Expat version 1.95.0 or newer.
 | |
| \versionadded{2.1}
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{StartDoctypeDeclHandler}{doctypeName,
 | |
|                                                        systemId, publicId,
 | |
|                                                        has_internal_subset}
 | |
| Called when Expat begins parsing the document type declaration
 | |
| (\code{<!DOCTYPE \ldots}).  The \var{doctypeName} is provided exactly
 | |
| as presented.  The \var{systemId} and \var{publicId} parameters give
 | |
| the system and public identifiers if specified, or \code{None} if
 | |
| omitted.  \var{has_internal_subset} will be true if the document
 | |
| contains and internal document declaration subset.
 | |
| This requires Expat version 1.2 or newer.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{EndDoctypeDeclHandler}{}
 | |
| Called when Expat is done parsing the document type delaration.
 | |
| This requires Expat version 1.2 or newer.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{ElementDeclHandler}{name, model}
 | |
| Called once for each element type declaration.  \var{name} is the name
 | |
| of the element type, and \var{model} is a representation of the
 | |
| content model.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{AttlistDeclHandler}{elname, attname,
 | |
|                                                   type, default, required}
 | |
| Called for each declared attribute for an element type.  If an
 | |
| attribute list declaration declares three attributes, this handler is
 | |
| called three times, once for each attribute.  \var{elname} is the name
 | |
| of the element to which the declaration applies and \var{attname} is
 | |
| the name of the attribute declared.  The attribute type is a string
 | |
| passed as \var{type}; the possible values are \code{'CDATA'},
 | |
| \code{'ID'}, \code{'IDREF'}, ...
 | |
| \var{default} gives the default value for the attribute used when the
 | |
| attribute is not specified by the document instance, or \code{None} if
 | |
| there is no default value (\code{\#IMPLIED} values).  If the attribute
 | |
| is required to be given in the document instance, \var{required} will
 | |
| be true.
 | |
| This requires Expat version 1.95.0 or newer.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{StartElementHandler}{name, attributes}
 | |
| Called for the start of every element.  \var{name} is a string
 | |
| containing the element name, and \var{attributes} is a dictionary
 | |
| mapping attribute names to their values.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{EndElementHandler}{name}
 | |
| Called for the end of every element.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{ProcessingInstructionHandler}{target, data}
 | |
| Called for every processing instruction.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{CharacterDataHandler}{data}
 | |
| Called for character data.  This will be called for normal character
 | |
| data, CDATA marked content, and ignorable whitespace.  Applications
 | |
| which must distinguish these cases can use the
 | |
| \member{StartCdataSectionHandler}, \member{EndCdataSectionHandler},
 | |
| and \member{ElementDeclHandler} callbacks to collect the required
 | |
| information.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{UnparsedEntityDeclHandler}{entityName, base,
 | |
|                                                          systemId, publicId,
 | |
|                                                          notationName}
 | |
| Called for unparsed (NDATA) entity declarations.  This is only present
 | |
| for version 1.2 of the Expat library; for more recent versions, use
 | |
| \member{EntityDeclHandler} instead.  (The underlying function in the
 | |
| Expat library has been declared obsolete.)
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{EntityDeclHandler}{entityName,
 | |
|                                                  is_parameter_entity, value,
 | |
|                                                  base, systemId,
 | |
|                                                  publicId,
 | |
|                                                  notationName}
 | |
| Called for all entity declarations.  For parameter and internal
 | |
| entities, \var{value} will be a string giving the declared contents
 | |
| of the entity; this will be \code{None} for external entities.  The
 | |
| \var{notationName} parameter will be \code{None} for parsed entities,
 | |
| and the name of the notation for unparsed entities.
 | |
| \var{is_parameter_entity} will be true if the entity is a paremeter
 | |
| entity or false for general entities (most applications only need to
 | |
| be concerned with general entities).
 | |
| This is only available starting with version 1.95.0 of the Expat
 | |
| library.
 | |
| \versionadded{2.1}
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{NotationDeclHandler}{notationName, base,
 | |
|                                                    systemId, publicId}
 | |
| Called for notation declarations.  \var{notationName}, \var{base}, and
 | |
| \var{systemId}, and \var{publicId} are strings if given.  If the
 | |
| public identifier is omitted, \var{publicId} will be \code{None}.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{StartNamespaceDeclHandler}{prefix, uri}
 | |
| Called when an element contains a namespace declaration.  Namespace
 | |
| declarations are processed before the \member{StartElementHandler} is
 | |
| called for the element on which declarations are placed.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{EndNamespaceDeclHandler}{prefix}
 | |
| Called when the closing tag is reached for an element 
 | |
| that contained a namespace declaration.  This is called once for each
 | |
| namespace declaration on the element in the reverse of the order for
 | |
| which the \member{StartNamespaceDeclHandler} was called to indicate
 | |
| the start of each namespace declaration's scope.  Calls to this
 | |
| handler are made after the corresponding \member{EndElementHandler}
 | |
| for the end of the element.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{CommentHandler}{data}
 | |
| Called for comments.  \var{data} is the text of the comment, excluding
 | |
| the leading `\code{<!-}\code{-}' and trailing `\code{-}\code{->}'.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{StartCdataSectionHandler}{}
 | |
| Called at the start of a CDATA section.  This and
 | |
| \member{StartCdataSectionHandler} are needed to be able to identify
 | |
| the syntactical start and end for CDATA sections.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{EndCdataSectionHandler}{}
 | |
| Called at the end of a CDATA section.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{DefaultHandler}{data}
 | |
| Called for any characters in the XML document for
 | |
| which no applicable handler has been specified.  This means
 | |
| characters that are part of a construct which could be reported, but
 | |
| for which no handler has been supplied. 
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{DefaultHandlerExpand}{data}
 | |
| This is the same as the \function{DefaultHandler}, 
 | |
| but doesn't inhibit expansion of internal entities.
 | |
| The entity reference will not be passed to the default handler.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{NotStandaloneHandler}{} Called if the
 | |
| XML document hasn't been declared as being a standalone document.
 | |
| This happens when there is an external subset or a reference to a
 | |
| parameter entity, but the XML declaration does not set standalone to
 | |
| \code{yes} in an XML declaration.  If this handler returns \code{0},
 | |
| then the parser will throw an \constant{XML_ERROR_NOT_STANDALONE}
 | |
| error.  If this handler is not set, no exception is raised by the
 | |
| parser for this condition.
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}[xmlparser]{ExternalEntityRefHandler}{context, base,
 | |
|                                                         systemId, publicId}
 | |
| Called for references to external entities.  \var{base} is the current
 | |
| base, as set by a previous call to \method{SetBase()}.  The public and
 | |
| system identifiers, \var{systemId} and \var{publicId}, are strings if
 | |
| given; if the public identifier is not given, \var{publicId} will be
 | |
| \code{None}.  The \var{context} value is opaque and should only be
 | |
| used as described below.
 | |
| 
 | |
| For external entities to be parsed, this handler must be implemented.
 | |
| It is responsible for creating the sub-parser using
 | |
| \code{ExternalEntityParserCreate(\var{context})}, initializing it with
 | |
| the appropriate callbacks, and parsing the entity.  This handler
 | |
| should return an integer; if it returns \code{0}, the parser will
 | |
| throw an \constant{XML_ERROR_EXTERNAL_ENTITY_HANDLING} error,
 | |
| otherwise parsing will continue.
 | |
| 
 | |
| If this handler is not provided, external entities are reported by the
 | |
| \member{DefaultHandler} callback, if provided.
 | |
| \end{methoddesc}
 | |
| 
 | |
| 
 | |
| \subsection{ExpatError Exceptions \label{expaterror-objects}}
 | |
| \sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
 | |
| 
 | |
| \exception{ExpatError} exceptions have a number of interesting
 | |
| attributes:
 | |
| 
 | |
| \begin{memberdesc}[ExpatError]{code}
 | |
|   Expat's internal error number for the specific error.  This will
 | |
|   match one of the constants defined in the \code{errors} object from
 | |
|   this module.
 | |
|   \versionadded{2.1}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[ExpatError]{lineno}
 | |
|   Line number on which the error was detected.  The first line is
 | |
|   numbered \code{1}.
 | |
|   \versionadded{2.1}
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}[ExpatError]{offset}
 | |
|   Character offset into the line where the error occurred.  The first
 | |
|   column is numbered \code{0}.
 | |
|   \versionadded{2.1}
 | |
| \end{memberdesc}
 | |
| 
 | |
| 
 | |
| \subsection{Example \label{expat-example}}
 | |
| 
 | |
| The following program defines three handlers that just print out their
 | |
| arguments.
 | |
| 
 | |
| \begin{verbatim}
 | |
| import xml.parsers.expat
 | |
| 
 | |
| # 3 handler functions
 | |
| def start_element(name, attrs):
 | |
|     print 'Start element:', name, attrs
 | |
| def end_element(name):
 | |
|     print 'End element:', name
 | |
| def char_data(data):
 | |
|     print 'Character data:', repr(data)
 | |
| 
 | |
| p = xml.parsers.expat.ParserCreate()
 | |
| 
 | |
| p.StartElementHandler = start_element
 | |
| p.EndElementHandler = end_element
 | |
| p.CharacterDataHandler = char_data
 | |
| 
 | |
| p.Parse("""<?xml version="1.0"?>
 | |
| <parent id="top"><child1 name="paul">Text goes here</child1>
 | |
| <child2 name="fred">More text</child2>
 | |
| </parent>""", 1)
 | |
| \end{verbatim}
 | |
| 
 | |
| The output from this program is:
 | |
| 
 | |
| \begin{verbatim}
 | |
| Start element: parent {'id': 'top'}
 | |
| Start element: child1 {'name': 'paul'}
 | |
| Character data: 'Text goes here'
 | |
| End element: child1
 | |
| Character data: '\n'
 | |
| Start element: child2 {'name': 'fred'}
 | |
| Character data: 'More text'
 | |
| End element: child2
 | |
| Character data: '\n'
 | |
| End element: parent
 | |
| \end{verbatim}
 | |
| 
 | |
| 
 | |
| \subsection{Content Model Descriptions \label{expat-content-models}}
 | |
| \sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
 | |
| 
 | |
| Content modules are described using nested tuples.  Each tuple
 | |
| contains four values: the type, the quantifier, the name, and a tuple
 | |
| of children.  Children are simply additional content module
 | |
| descriptions.
 | |
| 
 | |
| The values of the first two fields are constants defined in the
 | |
| \code{model} object of the \module{xml.parsers.expat} module.  These
 | |
| constants can be collected in two groups: the model type group and the
 | |
| quantifier group.
 | |
| 
 | |
| The constants in the model type group are:
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_ANY}
 | |
| The element named by the model name was declared to have a content
 | |
| model of \code{ANY}.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_CHOICE}
 | |
| The named element allows a choice from a number of options; this is
 | |
| used for content models such as \code{(A | B | C)}.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_EMPTY}
 | |
| Elements which are declared to be \code{EMPTY} have this model type.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_MIXED}
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_NAME}
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CTYPE_SEQ}
 | |
| Models which represent a series of models which follow one after the
 | |
| other are indicated with this model type.  This is used for models
 | |
| such as \code{(A, B, C)}.
 | |
| \end{datadescni}
 | |
| 
 | |
| 
 | |
| The constants in the quantifier group are:
 | |
| 
 | |
| \begin{datadescni}{XML_CQUANT_NONE}
 | |
| No modifier is given, so it can appear exactly once, as for \code{A}.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CQUANT_OPT}
 | |
| The model is optional: it can appear once or not at all, as for
 | |
| \code{A?}.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CQUANT_PLUS}
 | |
| The model must occur one or more times (like \code{A+}).
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_CQUANT_REP}
 | |
| The model must occur zero or more times, as for \code{A*}.
 | |
| \end{datadescni}
 | |
| 
 | |
| 
 | |
| \subsection{Expat error constants \label{expat-errors}}
 | |
| 
 | |
| The following constants are provided in the \code{errors} object of
 | |
| the \refmodule{xml.parsers.expat} module.  These constants are useful
 | |
| in interpreting some of the attributes of the \exception{ExpatError}
 | |
| exception objects raised when an error has occurred.
 | |
| 
 | |
| The \code{errors} object has the following attributes:
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_ASYNC_ENTITY}
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF}
 | |
| An entity reference in an attribute value referred to an external
 | |
| entity instead of an internal entity.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_BAD_CHAR_REF}
 | |
| A character reference referred to a character which is illegal in XML
 | |
| (for example, character \code{0}, or `\code{\&\#0;}'.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_BINARY_ENTITY_REF}
 | |
| An entity reference referred to an entity which was declared with a
 | |
| notation, so cannot be parsed.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_DUPLICATE_ATTRIBUTE}
 | |
| An attribute was used more than once in a start tag.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_INCORRECT_ENCODING}
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_INVALID_TOKEN}
 | |
| Raised when an input byte could not properly be assigned to a
 | |
| character; for example, a NUL byte (value \code{0}) in a UTF-8 input
 | |
| stream.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_JUNK_AFTER_DOC_ELEMENT}
 | |
| Something other than whitespace occurred after the document element.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_MISPLACED_XML_PI}
 | |
| An XML declaration was found somewhere other than the start of the
 | |
| input data.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_NO_ELEMENTS}
 | |
| The document contains no elements (XML requires all documents to
 | |
| contain exactly one top-level element)..
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_NO_MEMORY}
 | |
| Expat was not able to allocate memory internally.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_PARAM_ENTITY_REF}
 | |
| A parameter entity reference was found where it was not allowed.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_PARTIAL_CHAR}
 | |
| 
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_RECURSIVE_ENTITY_REF}
 | |
| An entity reference contained another reference to the same entity;
 | |
| possibly via a different name, and possibly indirectly.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_SYNTAX}
 | |
| Some unspecified syntax error was encountered.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_TAG_MISMATCH}
 | |
| An end tag did not match the innermost open start tag.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_UNCLOSED_TOKEN}
 | |
| Some token (such as a start tag) was not closed before the end of the
 | |
| stream or the next token was encountered.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_UNDEFINED_ENTITY}
 | |
| A reference was made to a entity which was not defined.
 | |
| \end{datadescni}
 | |
| 
 | |
| \begin{datadescni}{XML_ERROR_UNKNOWN_ENCODING}
 | |
| The document encoding is not supported by Expat.
 | |
| \end{datadescni}
 | 
