| 
									
										
										
										
											1998-08-10 19:42:37 +00:00
										 |  |  | \section{\module{regsub} --- | 
					
						
							|  |  |  |          Substitution and splitting operations that use regular expressions.} | 
					
						
							| 
									
										
										
										
											1998-07-23 17:59:49 +00:00
										 |  |  | \declaremodule{standard}{regsub} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | \modulesynopsis{Substitution and splitting operations that use regular expressions.} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-04-09 14:03:00 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | This module defines a number of functions useful for working with | 
					
						
							|  |  |  | regular expressions (see built-in module \code{regex}). | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-06-26 19:24:22 +00:00
										 |  |  | Warning: these functions are not thread-safe. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 |  |  | \strong{Obsolescence note:} | 
					
						
							|  |  |  | This module is obsolete as of Python version 1.5; it is still being | 
					
						
							|  |  |  | maintained because much existing code still uses it.  All new code in | 
					
						
							| 
									
										
										
										
											1998-01-22 20:47:26 +00:00
										 |  |  | need of regular expressions should use the new \module{re} module, which | 
					
						
							| 
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 |  |  | supports the more powerful and regular Perl-style regular expressions. | 
					
						
							|  |  |  | Existing code should be converted.  The standard library module | 
					
						
							| 
									
										
										
										
											1998-01-22 20:47:26 +00:00
										 |  |  | \module{reconvert} helps in converting \code{regex} style regular | 
					
						
							|  |  |  | expressions to \module{re} style regular expressions.  (For more | 
					
						
							| 
									
										
										
										
											1998-04-09 14:03:00 +00:00
										 |  |  | conversion help, see Andrew Kuchling's\index{Kuchling, Andrew} | 
					
						
							|  |  |  | ``regex-to-re HOWTO'' at | 
					
						
							|  |  |  | \url{http://www.python.org/doc/howto/regex-to-re/}.) | 
					
						
							| 
									
										
										
										
											1997-12-30 04:54:47 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{sub}{pat, repl, str} | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | Replace the first occurrence of pattern \var{pat} in string | 
					
						
							|  |  |  | \var{str} by replacement \var{repl}.  If the pattern isn't found, | 
					
						
							|  |  |  | the string is returned unchanged.  The pattern may be a string or an | 
					
						
							|  |  |  | already compiled pattern.  The replacement may contain references | 
					
						
							|  |  |  | \samp{\e \var{digit}} to subpatterns and escaped backslashes. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{gsub}{pat, repl, str} | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | Replace all (non-overlapping) occurrences of pattern \var{pat} in | 
					
						
							|  |  |  | string \var{str} by replacement \var{repl}.  The same rules as for | 
					
						
							|  |  |  | \code{sub()} apply.  Empty matches for the pattern are replaced only | 
					
						
							|  |  |  | when not adjacent to a previous match, so e.g. | 
					
						
							|  |  |  | \code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{split}{str, pat\optional{, maxsplit}} | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | Split the string \var{str} in fields separated by delimiters matching | 
					
						
							|  |  |  | the pattern \var{pat}, and return a list containing the fields.  Only | 
					
						
							|  |  |  | non-empty matches for the pattern are considered, so e.g. | 
					
						
							|  |  |  | \code{split('a:b', ':*')} returns \code{['a', 'b']} and | 
					
						
							| 
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 |  |  | \code{split('abc', '')} returns \code{['abc']}.  The \var{maxsplit} | 
					
						
							|  |  |  | defaults to 0. If it is nonzero, only \var{maxsplit} number of splits | 
					
						
							|  |  |  | occur, and the remainder of the string is returned as the final | 
					
						
							|  |  |  | element of the list. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}} | 
					
						
							| 
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 |  |  | Split the string \var{str} in fields separated by delimiters matching | 
					
						
							|  |  |  | the pattern \var{pat}, and return a list containing the fields as well | 
					
						
							|  |  |  | as the separators.  For example, \code{splitx('a:::b', ':*')} returns | 
					
						
							|  |  |  | \code{['a', ':::', 'b']}.  Otherwise, this function behaves the same | 
					
						
							|  |  |  | as \code{split}. | 
					
						
							|  |  |  | \end{funcdesc} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1998-03-17 06:33:25 +00:00
										 |  |  | \begin{funcdesc}{capwords}{s\optional{, pat}} | 
					
						
							| 
									
										
										
										
											1996-08-09 21:43:21 +00:00
										 |  |  | Capitalize words separated by optional pattern \var{pat}.  The default | 
					
						
							|  |  |  | pattern uses any characters except letters, digits and underscores as | 
					
						
							|  |  |  | word delimiters.  Capitalization is done by changing the first | 
					
						
							|  |  |  | character of each word to upper case. | 
					
						
							| 
									
										
										
										
											1994-01-02 01:22:07 +00:00
										 |  |  | \end{funcdesc} | 
					
						
							| 
									
										
										
										
											1997-02-18 18:59:37 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | \begin{funcdesc}{clear_cache}{} | 
					
						
							|  |  |  | The regsub module maintains a cache of compiled regular expressions, | 
					
						
							|  |  |  | keyed on the regular expression string and the syntax of the regex | 
					
						
							|  |  |  | module at the time the expression was compiled.  This function clears | 
					
						
							|  |  |  | that cache. | 
					
						
							|  |  |  | \end{funcdesc} |