1998-08-10 19:42:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\section{\module{htmllib} ---
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         A parser for HTML documents}
							 | 
						
					
						
							
								
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\declaremodule{standard}{htmllib}
							 | 
						
					
						
							
								
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\modulesynopsis{A parser for HTML documents.}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\index{HTML}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\index{hypertext}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This module defines a class which can serve as a base for parsing text
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								files formatted in the HyperText Mark-up Language (HTML).  The class
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								is not directly concerned with I/O --- it must be provided with input
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								in string form via a method, and makes calls to methods of a
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								``formatter'' object in order to produce output.  The
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\class{HTMLParser} class is designed to be used as a base class for
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								other classes in order to add functionality, and allows most of its
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								methods to be extended or overridden.  In turn, this class is derived
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from and extends the \class{SGMLParser} class defined in module
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\refmodule{sgmllib}\refstmodindex{sgmllib}.  The \class{HTMLParser}
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								implementation supports the HTML 2.0 language as described in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\rfc{1866}.  Two implementations of formatter objects are provided in
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								the \refmodule{formatter}\refstmodindex{formatter} module; refer to the
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								documentation for that module for information on the formatter
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								interface.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-12 14:39:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\withsubitem{(in module sgmllib)}{\ttindex{SGMLParser}}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The following is a summary of the interface defined by
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\class{sgmllib.SGMLParser}:
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\begin{itemize}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\item
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								The interface to feed data to an instance is through the \method{feed()}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								method, which takes a string argument.  This can be called with as
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								little or as much text at a time as desired; \samp{p.feed(a);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								p.feed(b)} has the same effect as \samp{p.feed(a+b)}.  When the data
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								contains complete HTML tags, these are processed immediately;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								incomplete elements are saved in a buffer.  To force processing of all
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								unprocessed data, call the \method{close()} method.
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								For example, to parse the entire contents of a file, use:
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-13 06:58:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{verbatim}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								parser.feed(open('myfile.html').read())
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								parser.close()
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-13 06:58:54 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{verbatim}
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-12 14:39:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\item
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The interface to define semantics for HTML tags is very simple: derive
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								a class and define methods called \method{start_\var{tag}()},
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\method{end_\var{tag}()}, or \method{do_\var{tag}()}.  The parser will
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								call these at appropriate moments: \method{start_\var{tag}} or
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\method{do_\var{tag}()} is called when an opening tag of the form
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\code{<\var{tag} ...>} is encountered; \method{end_\var{tag}()} is called
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								when a closing tag of the form \code{<\var{tag}>} is encountered.  If
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								an opening tag requires a corresponding closing tag, like \code{<H1>}
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								... \code{</H1>}, the class should define the \method{start_\var{tag}()}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								method; if a tag requires no closing tag, like \code{<P>}, the class
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								should define the \method{do_\var{tag}()} method.
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{itemize}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								The module defines a single class:
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-12 14:39:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{classdesc}{HTMLParser}{formatter}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This is the basic HTML parser class.  It supports all entity names
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-09 19:16:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								required by the HTML 2.0 specification (\rfc{1866}).  It also defines
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								handlers for all HTML 2.0 and many HTML 3.0 and 3.2 elements.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-12 14:39:09 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{classdesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1999-06-21 21:20:56 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\begin{seealso}
							 | 
						
					
						
							
								
									
										
										
										
											2001-07-05 16:34:36 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  \seemodule{HTMLParser}{Alternate HTML parser that offers a slightly
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         lower-level view of the input, but is
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         designed to work with XHTML, and does not
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         implement some of the SGML syntax not used in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         ``HTML as deployed'' and which isn't legal
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                         for XHTML.}
							 | 
						
					
						
							
								
									
										
										
										
											1999-06-21 21:20:56 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  \seemodule{htmlentitydefs}{Definition of replacement text for HTML
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                             2.0 entities.}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  \seemodule{sgmllib}{Base class for \class{HTMLParser}.}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{seealso}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\subsection{HTMLParser Objects \label{html-parser-objects}}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								In addition to tag methods, the \class{HTMLParser} class provides some
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								additional methods and instance variables for use within tag methods.
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{memberdesc}{formatter}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This is the formatter instance associated with the parser.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{memberdesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{memberdesc}{nofill}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Boolean flag which should be true when whitespace should not be
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								collapsed, or false when it should be.  In general, this should only
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								be true when character data is to be treated as ``preformatted'' text,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								as within a \code{<PRE>} element.  The default value is false.  This
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								affects the operation of \method{handle_data()} and \method{save_end()}.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{memberdesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{methoddesc}{anchor_bgn}{href, name, type}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This method is called at the start of an anchor region.  The arguments
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								correspond to the attributes of the \code{<A>} tag with the same
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								names.  The default implementation maintains a list of hyperlinks
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 18:25:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								(defined by the \code{HREF} attribute for \code{<A>} tags) within the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								document.  The list of hyperlinks is available as the data attribute
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\member{anchorlist}.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{methoddesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{methoddesc}{anchor_end}{}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This method is called at the end of an anchor region.  The default
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								implementation adds a textual footnote marker using an index into the
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								list of hyperlinks created by \method{anchor_bgn()}.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{methoddesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{methoddesc}{handle_image}{source, alt\optional{, ismap\optional{, align\optional{, width\optional{, height}}}}}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This method is called to handle images.  The default implementation
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								simply passes the \var{alt} value to the \method{handle_data()}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								method.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{methoddesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{methoddesc}{save_bgn}{}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Begins saving character data in a buffer instead of sending it to the
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								formatter object.  Retrieve the stored data via \method{save_end()}.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Use of the \method{save_bgn()} / \method{save_end()} pair may not be
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								nested.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{methoddesc}
							 | 
						
					
						
							
								
									
										
										
										
											1995-02-28 17:14:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{methoddesc}{save_end}{}
							 | 
						
					
						
							
								
									
										
										
										
											1996-10-08 21:52:23 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Ends buffering character data and returns all data saved since the
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-16 19:01:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								preceding call to \method{save_bgn()}.  If the \member{nofill} flag is
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								false, whitespace is collapsed to single spaces.  A call to this
							 | 
						
					
						
							
								
									
										
										
										
											2000-07-16 19:01:10 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								method without a preceding call to \method{save_bgn()} will raise a
							 | 
						
					
						
							
								
									
										
										
										
											1998-02-10 21:42:27 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\exception{TypeError} exception.
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-27 05:27:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\end{methoddesc}
							 | 
						
					
						
							
								
									
										
										
										
											1999-06-21 21:20:56 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\section{\module{htmlentitydefs} ---
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								         Definitions of HTML general entities}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\declaremodule{standard}{htmlentitydefs}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\modulesynopsis{Definitions of HTML general entities.}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\sectionauthor{Fred L. Drake, Jr.}{fdrake@acm.org}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module defines a single dictionary, \code{entitydefs}, which is
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								used by the \refmodule{htmllib} module to provide the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\member{entitydefs} member of the \class{HTMLParser} class.  The
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								definition provided here contains all the entities defined by HTML 2.0 
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								that can be handled using simple textual substitution in the Latin-1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								character set (ISO-8859-1).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\begin{datadesc}{entitydefs}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  A dictionary mapping HTML 2.0 entity definitions to their
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  replacement text in ISO Latin-1.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{datadesc}
							 |