mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			85 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			85 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| % Module and documentation by Eric S. Raymond, 21 Dec 1998 
 | |
| 
 | |
| \section{\module{shlex} ---
 | |
|          Simple lexical analysis.}
 | |
| 
 | |
| \declaremodule{standard}{shlex}
 | |
| \modulesynopsis{Simple lexical analysis for \UNIX{} shell-like languages.}
 | |
| \moduleauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | |
| \sectionauthor{Eric S. Raymond}{esr@snark.thyrsus.com}
 | |
| 
 | |
| \versionadded{1.5.2}
 | |
| 
 | |
| The \class{shlex} class makes it easy to write lexical analyzers for
 | |
| simple syntaxes resembling that of the \UNIX{} shell.  This will often
 | |
| be useful for writing minilanguages, e.g.\ in run control files for
 | |
| Python applications.
 | |
| 
 | |
| \begin{classdesc}{shlex}{\optional{stream}}
 | |
| A \class{shlex} instance or subclass instance is a lexical analyzer
 | |
| object.  The initialization argument, if present, specifies where to
 | |
| read characters from. It must be a file- or stream-like object with
 | |
| \method{read()} and \method{readline()} methods.  If no argument is given,
 | |
| input will be taken from \code{sys.stdin}.
 | |
| 
 | |
| \end{classdesc}
 | |
| 
 | |
| \subsection{shlex Objects \label{shlex-objects}}
 | |
| 
 | |
| A \class{shlex} instance has the following methods:
 | |
| 
 | |
| \begin{methoddesc}{get_token}{}
 | |
| Return a token.  If tokens have been stacked using
 | |
| \method{push_token()}, pop a token off the stack.  Otherwise, read one
 | |
| from the input stream.  If reading encounters an immediate
 | |
| end-of-file, an empty string is returned. 
 | |
| \end{methoddesc}
 | |
| 
 | |
| \begin{methoddesc}{push_token}{str}
 | |
| Push the argument onto the token stack.
 | |
| \end{methoddesc}
 | |
| 
 | |
| Instances of \class{shlex} subclasses have some public instance
 | |
| variables which either control lexical analysis or can be used
 | |
| for debugging:
 | |
| 
 | |
| \begin{memberdesc}{commenters}
 | |
| The string of characters that are recognized as comment beginners.
 | |
| All characters from the comment beginner to end of line are ignored.
 | |
| Includes just \character{\#} by default.   
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{wordchars}
 | |
| The string of characters that will accumulate into multi-character
 | |
| tokens. By default, includes all \ASCII{} alphanumerics and
 | |
| underscore.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{whitespace}
 | |
| Characters that will be considered whitespace and skipped.  Whitespace
 | |
| bounds tokens.  By default, includes space, tab, linefeed and
 | |
| carriage-return.
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{quotes}
 | |
| Characters that will be considered string quotes.  The token
 | |
| accumulates until the same quote is encountered again (thus, different
 | |
| quote types protect each other as in the shall.)  By default, includes
 | |
| \ASCII{} single and double quotes.
 | |
| \end{memberdesc}
 | |
| 
 | |
| Note that any character not declared to be a word character,
 | |
| whitespace, or a quote will be returned as a single-character token.
 | |
| 
 | |
| Quote and comment characters are not recognized within words.  Thus,
 | |
| the bare words \samp{ain't} and \samp{ain\#t} would be returned as single
 | |
| tokens by the default parser.
 | |
| 
 | |
| \begin{memberdesc}{lineno}
 | |
| Source line number (count of newlines seen so far plus one).
 | |
| \end{memberdesc}
 | |
| 
 | |
| \begin{memberdesc}{token}
 | |
| The token buffer.  It may be useful to examine this when catching
 | |
| exceptions.
 | |
| \end{memberdesc}
 | 
