1998-08-10 19:42:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\section{\module{regsub} ---
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 21:23:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								         String operations using regular expressions}
							 | 
						
					
						
							
								
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 21:23:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\declaremodule{standard}{regsub}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\modulesynopsis{Substitution and splitting operations that use
							 | 
						
					
						
							
								
									
										
										
										
											2000-09-25 17:23:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                regular expressions.  \strong{Obsolete!}}
							 | 
						
					
						
							
								
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-04-09 14:03:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module defines a number of functions useful for working with
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 21:23:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								regular expressions (see built-in module \refmodule{regex}).
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1996-06-26 19:24:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Warning: these functions are not thread-safe.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\strong{Obsolescence note:}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This module is obsolete as of Python version 1.5; it is still being
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								maintained because much existing code still uses it.  All new code in
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 21:23:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								need of regular expressions should use the new \refmodule{re} module, which
							 | 
						
					
						
							
								
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								supports the more powerful and regular Perl-style regular expressions.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Existing code should be converted.  The standard library module
							 | 
						
					
						
							
								
									
										
										
										
											1999-04-22 21:23:22 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\module{reconvert} helps in converting \refmodule{regex} style regular
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								expressions to \refmodule{re} style regular expressions.  (For more
							 | 
						
					
						
							
								
									
										
										
										
											1998-04-09 14:03:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								conversion help, see Andrew Kuchling's\index{Kuchling, Andrew}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								``regex-to-re HOWTO'' at
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\url{http://www.python.org/doc/howto/regex-to-re/}.)
							 | 
						
					
						
							
								
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{sub}{pat, repl, str}
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Replace the first occurrence of pattern \var{pat} in string
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\var{str} by replacement \var{repl}.  If the pattern isn't found,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								the string is returned unchanged.  The pattern may be a string or an
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								already compiled pattern.  The replacement may contain references
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\samp{\e \var{digit}} to subpatterns and escaped backslashes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{gsub}{pat, repl, str}
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Replace all (non-overlapping) occurrences of pattern \var{pat} in
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								string \var{str} by replacement \var{repl}.  The same rules as for
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\code{sub()} apply.  Empty matches for the pattern are replaced only
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								when not adjacent to a previous match, so e.g.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{split}{str, pat\optional{, maxsplit}}
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Split the string \var{str} in fields separated by delimiters matching
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								the pattern \var{pat}, and return a list containing the fields.  Only
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								non-empty matches for the pattern are considered, so e.g.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\code{split('a:b', ':*')} returns \code{['a', 'b']} and
							 | 
						
					
						
							
								
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\code{split('abc', '')} returns \code{['abc']}.  The \var{maxsplit}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								defaults to 0. If it is nonzero, only \var{maxsplit} number of splits
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								occur, and the remainder of the string is returned as the final
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								element of the list.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}}
							 | 
						
					
						
							
								
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Split the string \var{str} in fields separated by delimiters matching
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								the pattern \var{pat}, and return a list containing the fields as well
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								as the separators.  For example, \code{splitx('a:::b', ':*')} returns
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\code{['a', ':::', 'b']}.  Otherwise, this function behaves the same
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								as \code{split}.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{capwords}{s\optional{, pat}}
							 | 
						
					
						
							
								
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Capitalize words separated by optional pattern \var{pat}.  The default
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								pattern uses any characters except letters, digits and underscores as
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								word delimiters.  Capitalization is done by changing the first
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								character of each word to upper case.
							 | 
						
					
						
							
								
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 | 
						
					
						
							
								
									
										
										
										
											1997-02-18 18:59:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\begin{funcdesc}{clear_cache}{}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The regsub module maintains a cache of compiled regular expressions,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								keyed on the regular expression string and the syntax of the regex
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								module at the time the expression was compiled.  This function clears
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								that cache.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\end{funcdesc}
							 |