mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	AMK's latest; plus three null bytes that I added for purify
This commit is contained in:
		
							parent
							
								
									e4eb2231fd
								
							
						
					
					
						commit
						58132c6799
					
				
					 4 changed files with 157 additions and 167 deletions
				
			
		|  | @ -3,7 +3,7 @@ | ||||||
| *************************************************/ | *************************************************/ | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #define PCRE_VERSION       "1.01 19-Nov-1997" | #define PCRE_VERSION       "1.02 12-Dec-1997" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /* This is a library of functions to support regular expressions whose syntax
 | /* This is a library of functions to support regular expressions whose syntax
 | ||||||
|  | @ -114,7 +114,7 @@ enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, | ||||||
| 
 | 
 | ||||||
| /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
 | /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
 | ||||||
| that extract substrings. Starting from 1 (i.e. after OP_END), the values up to | that extract substrings. Starting from 1 (i.e. after OP_END), the values up to | ||||||
| OP_EOL must correspond in order to the list of escapes immediately above. */ | OP_EOD must correspond in order to the list of escapes immediately above. */ | ||||||
| 
 | 
 | ||||||
| enum { | enum { | ||||||
|   OP_END,            /* End of pattern */ |   OP_END,            /* End of pattern */ | ||||||
|  | @ -131,8 +131,7 @@ enum { | ||||||
|   OP_NOT_WORDCHAR,       /* \W */ |   OP_NOT_WORDCHAR,       /* \W */ | ||||||
|   OP_WORDCHAR,           /* \w */ |   OP_WORDCHAR,           /* \w */ | ||||||
|   OP_CUT,            /* The analogue of Prolog's "cut" operation (extension) */ |   OP_CUT,            /* The analogue of Prolog's "cut" operation (extension) */ | ||||||
|   OP_EOD,            /* End of data: or \Z. This must always be the last
 |   OP_EOD,            /* End of data: \Z. */ | ||||||
|                         of the backslashed meta values. */ |  | ||||||
| 
 | 
 | ||||||
|   OP_NOT_WORD_BOUNDARY_L,  /* localized \B */ |   OP_NOT_WORD_BOUNDARY_L,  /* localized \B */ | ||||||
|   OP_WORD_BOUNDARY_L,      /* localized \b */ |   OP_WORD_BOUNDARY_L,      /* localized \b */ | ||||||
|  | @ -55,14 +55,14 @@ extern void  (*pcre_free)(void *); | ||||||
| /* Functions */ | /* Functions */ | ||||||
| 
 | 
 | ||||||
| #ifdef FOR_PYTHON | #ifdef FOR_PYTHON | ||||||
| extern pcre *pcre_compile(const char *, int, char **, int *, PyObject *); | extern pcre *pcre_compile(const char *, int, const char **, int *, PyObject *); | ||||||
| #else | #else | ||||||
| extern pcre *pcre_compile(const char *, int, char **, int *); | extern pcre *pcre_compile(const char *, int, const char **, int *); | ||||||
| #endif | #endif | ||||||
| extern int pcre_exec(const pcre *, const pcre_extra *, const char *, | extern int pcre_exec(const pcre *, const pcre_extra *, const char *, | ||||||
|   int, int, int *, int); |   int, int, int *, int); | ||||||
| extern int pcre_info(const pcre *, int *, int *); | extern int pcre_info(const pcre *, int *, int *); | ||||||
| extern pcre_extra *pcre_study(const pcre *, int, char **); | extern pcre_extra *pcre_study(const pcre *, int, const char **); | ||||||
| extern char *pcre_version(void); | extern const char *pcre_version(void); | ||||||
| 
 | 
 | ||||||
| #endif /* End of pcre.h */ | #endif /* End of pcre.h */ | ||||||
|  |  | ||||||
|  | @ -1,5 +1,5 @@ | ||||||
| /***********************************************************
 | /***********************************************************
 | ||||||
| Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam, | Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam, | ||||||
| The Netherlands. | The Netherlands. | ||||||
| 
 | 
 | ||||||
|                         All Rights Reserved |                         All Rights Reserved | ||||||
|  | @ -33,6 +33,7 @@ PERFORMANCE OF THIS SOFTWARE. | ||||||
| 
 | 
 | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
| 
 | 
 | ||||||
|  | #include <assert.h> | ||||||
| #ifndef Py_eval_input | #ifndef Py_eval_input | ||||||
| /* For Python 1.4, graminit.h has to be explicitly included */ | /* For Python 1.4, graminit.h has to be explicitly included */ | ||||||
| #include "graminit.h" | #include "graminit.h" | ||||||
|  | @ -44,7 +45,7 @@ PERFORMANCE OF THIS SOFTWARE. | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| #include "pcre.h" | #include "pcre.h" | ||||||
| #include "pcre-internal.h" | #include "pcre-int.h" | ||||||
| 
 | 
 | ||||||
| static PyObject *ErrorObject; | static PyObject *ErrorObject; | ||||||
| 
 | 
 | ||||||
|  | @ -127,7 +128,9 @@ PyPcre_exec(self, args) | ||||||
| 	if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;} | 	if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;} | ||||||
| 	if (count<0) | 	if (count<0) | ||||||
| 	{ | 	{ | ||||||
| 		PyErr_SetObject(ErrorObject, Py_BuildValue("si", "Regex error", count)); | 		PyObject *errval = Py_BuildValue("si", "Regex execution error", count); | ||||||
|  | 		PyErr_SetObject(ErrorObject, errval); | ||||||
|  | 		Py_XDECREF(errval); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	 | 	 | ||||||
|  | @ -191,7 +194,7 @@ PyPcre_compile(self, args) | ||||||
| 	PcreObject *rv; | 	PcreObject *rv; | ||||||
| 	PyObject *dictionary; | 	PyObject *dictionary; | ||||||
| 	char *pattern, *newpattern; | 	char *pattern, *newpattern; | ||||||
| 	char *error; | 	const char *error; | ||||||
| 	int num_zeros, i, j; | 	int num_zeros, i, j; | ||||||
| 	 | 	 | ||||||
| 	int patternlen, options, erroroffset; | 	int patternlen, options, erroroffset; | ||||||
|  | @ -203,12 +206,13 @@ PyPcre_compile(self, args) | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 
 | 
 | ||||||
| 	/* PCRE doesn't like having null bytes in its pattern, so we have to replace 
 | 	/* PCRE doesn't like having null bytes in its pattern, so we have to replace 
 | ||||||
| 	   any zeros in the string with the characters '\0'. */ | 	   any zeros in the string with the characters '\000'. This increases the size | ||||||
| 	num_zeros=1; | 	   of the string by 3*num_zeros, plus 1 byte for the terminating \0.  */ | ||||||
|  | 	num_zeros=1;      /* Start at 1; this will give 3 extra bytes of leeway */ | ||||||
| 	for(i=0; i<patternlen; i++) { | 	for(i=0; i<patternlen; i++) { | ||||||
| 		if (pattern[i]==0) num_zeros++; | 		if (pattern[i]==0) num_zeros++; | ||||||
| 	} | 	} | ||||||
| 	newpattern=malloc(patternlen+num_zeros); | 	newpattern=malloc(patternlen + num_zeros*3 + 4);  | ||||||
| 	if (newpattern==NULL) { | 	if (newpattern==NULL) { | ||||||
| 		PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern"); | 		PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern"); | ||||||
| 		return NULL; | 		return NULL; | ||||||
|  | @ -217,10 +221,16 @@ PyPcre_compile(self, args) | ||||||
| 	{ | 	{ | ||||||
| 		if (pattern[i]!=0) newpattern[j]=pattern[i]; | 		if (pattern[i]!=0) newpattern[j]=pattern[i]; | ||||||
| 		else { | 		else { | ||||||
| 			newpattern[j++]='\\'; | 			newpattern[j++] ='\\'; | ||||||
| 			newpattern[j]  ='0'; | 			newpattern[j++] = '0'; | ||||||
|  | 			newpattern[j++] = '0'; | ||||||
|  | 			newpattern[j  ] = '0'; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | 	/* Keep purify happy; for pcre, one null byte is enough! */ | ||||||
|  | 	newpattern[j++]='\0'; | ||||||
|  | 	newpattern[j++]='\0'; | ||||||
|  | 	newpattern[j++]='\0'; | ||||||
| 	newpattern[j]='\0'; | 	newpattern[j]='\0'; | ||||||
| 
 | 
 | ||||||
| 	rv->regex = pcre_compile((char*)newpattern, options,  | 	rv->regex = pcre_compile((char*)newpattern, options,  | ||||||
|  | @ -231,21 +241,27 @@ PyPcre_compile(self, args) | ||||||
| 		PyMem_DEL(rv); | 		PyMem_DEL(rv); | ||||||
| 		if (!PyErr_Occurred()) | 		if (!PyErr_Occurred()) | ||||||
| 		{ | 		{ | ||||||
| 			PyErr_SetObject(ErrorObject, Py_BuildValue("si", error, erroroffset)); | 			PyObject *errval = Py_BuildValue("si", error, erroroffset); | ||||||
|  | 			PyErr_SetObject(ErrorObject, errval); | ||||||
|  | 			Py_XDECREF(errval); | ||||||
| 		} | 		} | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
| 	rv->regex_extra=pcre_study(rv->regex, 0, &error); | 	rv->regex_extra=pcre_study(rv->regex, 0, &error); | ||||||
| 	if (rv->regex_extra==NULL && error!=NULL)  | 	if (rv->regex_extra==NULL && error!=NULL)  | ||||||
| 	{ | 	{ | ||||||
|  | 		PyObject *errval = Py_BuildValue("si", error, 0); | ||||||
| 		PyMem_DEL(rv); | 		PyMem_DEL(rv); | ||||||
| 		PyErr_SetObject(ErrorObject, Py_BuildValue("si", error, 0)); | 		PyErr_SetObject(ErrorObject, errval); | ||||||
|  | 		Py_XDECREF(errval); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
|         rv->num_groups = pcre_info(rv->regex, NULL, NULL); |         rv->num_groups = pcre_info(rv->regex, NULL, NULL); | ||||||
| 	if (rv->num_groups<0)  | 	if (rv->num_groups<0)  | ||||||
| 	{ | 	{ | ||||||
| 		PyErr_SetObject(ErrorObject, Py_BuildValue("si", "Regex error", rv->num_groups)); | 		PyObject *errval = Py_BuildValue("si", error, rv->num_groups); | ||||||
|  | 		PyErr_SetObject(ErrorObject, errval); | ||||||
|  | 		Py_XDECREF(errval); | ||||||
| 		PyMem_DEL(rv); | 		PyMem_DEL(rv); | ||||||
| 		return NULL; | 		return NULL; | ||||||
| 	} | 	} | ||||||
|  | @ -526,7 +542,7 @@ PyPcre_expand(self, args) | ||||||
| 				Py_DECREF(r); Py_DECREF(tuple); | 				Py_DECREF(r); Py_DECREF(tuple); | ||||||
| 				if (result==NULL) | 				if (result==NULL) | ||||||
| 				{ | 				{ | ||||||
| 					/* The group() method trigged an exception of some sort */ | 					/* The group() method triggered an exception of some sort */ | ||||||
| 					Py_DECREF(results); | 					Py_DECREF(results); | ||||||
| 					Py_DECREF(value); | 					Py_DECREF(value); | ||||||
| 					return NULL; | 					return NULL; | ||||||
|  |  | ||||||
							
								
								
									
										267
									
								
								Modules/pypcre.c
									
										
									
									
									
								
							
							
						
						
									
										267
									
								
								Modules/pypcre.c
									
										
									
									
									
								
							|  | @ -15,10 +15,9 @@ file by hand, or submit patches to it. | ||||||
| The Python-specific PCRE distribution can be retrieved from | The Python-specific PCRE distribution can be retrieved from | ||||||
|        http://starship.skyport.net/crew/amk/regex/
 |        http://starship.skyport.net/crew/amk/regex/
 | ||||||
| 
 | 
 | ||||||
| The unmodified original PCRE distribution doesn't have a fixed URL | The unmodified original PCRE distribution is available at | ||||||
| yet; write Philip Hazel <ph10@cam.ac.uk> for the latest version. | ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/, and is originally
 | ||||||
| 
 | written by: Philip Hazel <ph10@cam.ac.uk> | ||||||
| Written by:  Philip Hazel <ph10@cam.ac.uk> |  | ||||||
| 
 | 
 | ||||||
| Extensively modified by the Python String-SIG: <string-sig@python.org> | Extensively modified by the Python String-SIG: <string-sig@python.org> | ||||||
| Send bug reports to:                           <string-sig@python.org> | Send bug reports to:                           <string-sig@python.org> | ||||||
|  | @ -46,7 +45,7 @@ computer system, and to redistribute it freely, subject to the following | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #define FOR_PYTHON | #define FOR_PYTHON | ||||||
| #include "pcre-internal.h" | #include "pcre-int.h" | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
| #include "mymalloc.h" | #include "mymalloc.h" | ||||||
| #include <ctype.h> | #include <ctype.h> | ||||||
|  | @ -254,13 +253,13 @@ Returns:       TRUE if table built, FALSE otherwise | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| set_start_bits(uschar *code, uschar *start_bits) | set_start_bits(const uschar *code, uschar *start_bits) | ||||||
| { | { | ||||||
| register int c; | register int c; | ||||||
| 
 | 
 | ||||||
| do | do | ||||||
|   { |   { | ||||||
|   uschar *tcode = code + 3; |   const uschar *tcode = code + 3; | ||||||
|   BOOL try_next = TRUE; |   BOOL try_next = TRUE; | ||||||
| 
 | 
 | ||||||
|   while (try_next) |   while (try_next) | ||||||
|  | @ -466,12 +465,12 @@ Returns:    pointer to a pcre_extra block, | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| pcre_extra * | pcre_extra * | ||||||
| pcre_study(const pcre *external_re, int options, char **errorptr) | pcre_study(const pcre *external_re, int options, const char **errorptr) | ||||||
| { | { | ||||||
| BOOL caseless; | BOOL caseless; | ||||||
| uschar start_bits[32]; | uschar start_bits[32]; | ||||||
| real_pcre_extra *extra; | real_pcre_extra *extra; | ||||||
| real_pcre *re = (real_pcre *)external_re; | const real_pcre *re = (const real_pcre *)external_re; | ||||||
| 
 | 
 | ||||||
| *errorptr = NULL; | *errorptr = NULL; | ||||||
| 
 | 
 | ||||||
|  | @ -592,7 +591,8 @@ static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; | ||||||
| /* Text forms of OP_ values and things, for debugging */ | /* Text forms of OP_ values and things, for debugging */ | ||||||
| 
 | 
 | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", | static const char *OP_names[] = {  | ||||||
|  |   "End", "\\A", "\\B", "\\b", "\\D", "\\d", | ||||||
|   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z",  |   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z",  | ||||||
|   "localized \\B", "localized \\b", "localized \\W", "localized \\w", |   "localized \\B", "localized \\b", "localized \\W", "localized \\w", | ||||||
|   "^", "$", "Any", "chars", |   "^", "$", "Any", "chars", | ||||||
|  | @ -627,8 +627,8 @@ static short int escapes[] = { | ||||||
| 
 | 
 | ||||||
| /* Definition to allow mutual recursion */ | /* Definition to allow mutual recursion */ | ||||||
| 
 | 
 | ||||||
| static BOOL compile_regex(int, int *, uschar **, uschar **,  | static BOOL compile_regex(int, int *, uschar **, const uschar **,  | ||||||
| 			   char **, PyObject *); | 			   const char **, PyObject *); | ||||||
| 
 | 
 | ||||||
| /* Structure for passing "static" information around between the functions
 | /* Structure for passing "static" information around between the functions
 | ||||||
| doing the matching, so that they are thread-safe. */ | doing the matching, so that they are thread-safe. */ | ||||||
|  | @ -645,10 +645,10 @@ typedef struct match_data { | ||||||
|   BOOL   noteol;                /* NOTEOL flag */ |   BOOL   noteol;                /* NOTEOL flag */ | ||||||
|   BOOL   dotall;                /* Dot matches any char */ |   BOOL   dotall;                /* Dot matches any char */ | ||||||
|   BOOL   endonly;               /* Dollar not before final \n */ |   BOOL   endonly;               /* Dollar not before final \n */ | ||||||
|   uschar *start_subject;        /* Start of the subject string */ |   const uschar *start_subject;  /* Start of the subject string */ | ||||||
|   uschar *end_subject;          /* End of the subject string */ |   const uschar *end_subject;    /* End of the subject string */ | ||||||
|   jmp_buf fail_env;             /* Environment for longjump() break out */ |   jmp_buf fail_env;             /* Environment for longjump() break out */ | ||||||
|   uschar *end_match_ptr;        /* Subject position at end match */ |   const uschar *end_match_ptr;  /* Subject position at end match */ | ||||||
|   int     end_offset_top;       /* Highwater mark at end of match */ |   int     end_offset_top;       /* Highwater mark at end of match */ | ||||||
|   jmp_buf error_env;          /* For longjmp() if an error occurs deep inside a 
 |   jmp_buf error_env;          /* For longjmp() if an error occurs deep inside a 
 | ||||||
| 				   matching operation */ | 				   matching operation */ | ||||||
|  | @ -656,7 +656,7 @@ typedef struct match_data { | ||||||
|   int    point;                 /* Point to add next item pushed onto stacks */ |   int    point;                 /* Point to add next item pushed onto stacks */ | ||||||
|   /* Pointers to the 6 stacks */ |   /* Pointers to the 6 stacks */ | ||||||
|   int *off_num, *offset_top, *r1, *r2;  |   int *off_num, *offset_top, *r1, *r2;  | ||||||
|   uschar **eptr, **ecode;  |   const uschar **eptr, **ecode;  | ||||||
| } match_data; | } match_data; | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -680,7 +680,7 @@ void  (*pcre_free)(void *) = free; | ||||||
| *          Return version string                 * | *          Return version string                 * | ||||||
| *************************************************/ | *************************************************/ | ||||||
| 
 | 
 | ||||||
| char * | const char * | ||||||
| pcre_version(void) | pcre_version(void) | ||||||
| { | { | ||||||
| return PCRE_VERSION; | return PCRE_VERSION; | ||||||
|  | @ -710,7 +710,7 @@ Returns:        number of identifying extraction brackets | ||||||
| int | int | ||||||
| pcre_info(const pcre *external_re, int *optptr, int *first_char) | pcre_info(const pcre *external_re, int *optptr, int *first_char) | ||||||
| { | { | ||||||
| real_pcre *re = (real_pcre *)external_re; | const real_pcre *re = (real_pcre *)external_re; | ||||||
| if (re == NULL) return PCRE_ERROR_NULL; | if (re == NULL) return PCRE_ERROR_NULL; | ||||||
| if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; | if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; | ||||||
| if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); | if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); | ||||||
|  | @ -906,9 +906,9 @@ return FALSE; | ||||||
| */ | */ | ||||||
|    |    | ||||||
| static int  | static int  | ||||||
| get_group_id(uschar *ptr, char finalchar, char **errorptr) | get_group_id(const uschar *ptr, char finalchar, const char **errorptr) | ||||||
| { | { | ||||||
|   uschar *start = ptr; |   const uschar *start = ptr; | ||||||
| 
 | 
 | ||||||
|   /* If the first character is not in \w, or is in \w but is a digit,
 |   /* If the first character is not in \w, or is in \w but is a digit,
 | ||||||
|      report an error */ |      report an error */ | ||||||
|  | @ -960,10 +960,10 @@ Returns:     zero or positive => a data character | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static int | static int | ||||||
| check_escape(uschar **ptrptr, char **errorptr, int bracount, int options, | check_escape(const uschar **ptrptr, const char **errorptr, int bracount,  | ||||||
|   BOOL isclass) | 	     int options, BOOL isclass) | ||||||
| { | { | ||||||
| uschar *ptr = *ptrptr; | const uschar *ptr = *ptrptr; | ||||||
| int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */ | int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */ | ||||||
| int i; | int i; | ||||||
| 
 | 
 | ||||||
|  | @ -1092,7 +1092,7 @@ Returns:    TRUE or FALSE | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| is_counted_repeat(uschar *p) | is_counted_repeat(const uschar *p) | ||||||
| { | { | ||||||
| if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; | if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; | ||||||
| while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; | while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; | ||||||
|  | @ -1127,8 +1127,8 @@ Returns:     pointer to '}' on success; | ||||||
|              current ptr on error, with errorptr set |              current ptr on error, with errorptr set | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static uschar * | static const uschar * | ||||||
| read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr) | read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) | ||||||
| { | { | ||||||
| int min = 0; | int min = 0; | ||||||
| int max = -1; | int max = -1; | ||||||
|  | @ -1183,16 +1183,16 @@ Returns:     TRUE on success | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| compile_branch(int options, int *brackets, uschar **codeptr, | compile_branch(int options, int *brackets, uschar **codeptr, | ||||||
| 	       uschar **ptrptr, char **errorptr, PyObject *dictionary) | 	       const uschar **ptrptr, const char **errorptr, PyObject *dictionary) | ||||||
| { | { | ||||||
| int repeat_type, op_type; | int repeat_type, op_type; | ||||||
| int repeat_min, repeat_max; | int repeat_min, repeat_max; | ||||||
| int bravalue, length; | int bravalue, length; | ||||||
| register int c; | register int c; | ||||||
| register uschar *code = *codeptr; | register uschar *code = *codeptr; | ||||||
| uschar *ptr = *ptrptr; | const uschar *ptr = *ptrptr; | ||||||
|  | const uschar *oldptr; | ||||||
| uschar *previous = NULL; | uschar *previous = NULL; | ||||||
| uschar *oldptr; |  | ||||||
| uschar class[32]; | uschar class[32]; | ||||||
| uschar *class_flag;  /* Pointer to the single-byte flag for OP_CLASS_L */ | uschar *class_flag;  /* Pointer to the single-byte flag for OP_CLASS_L */ | ||||||
| 
 | 
 | ||||||
|  | @ -1299,7 +1299,7 @@ for (;; ptr++) | ||||||
|       /* Backslash may introduce a single character, or it may introduce one
 |       /* Backslash may introduce a single character, or it may introduce one
 | ||||||
|       of the specials, which just set a flag. Escaped items are checked for |       of the specials, which just set a flag. Escaped items are checked for | ||||||
|       validity in the pre-compiling pass. The sequence \b is a special case. |       validity in the pre-compiling pass. The sequence \b is a special case. | ||||||
|       Inside a class (and only there) it is treated as backslash. Elsewhere |       Inside a class (and only there) it is treated as backspace. Elsewhere | ||||||
|       it marks a word boundary. Other escapes have preset maps ready to |       it marks a word boundary. Other escapes have preset maps ready to | ||||||
|       or into the one we are building. We assume they have more than one |       or into the one we are building. We assume they have more than one | ||||||
|       character in them, so set class_count bigger than one. */ |       character in them, so set class_count bigger than one. */ | ||||||
|  | @ -1314,22 +1314,12 @@ for (;; ptr++) | ||||||
|           switch (-c) |           switch (-c) | ||||||
|             { |             { | ||||||
|             case ESC_d: |             case ESC_d: | ||||||
| 	    if (options & PCRE_LOCALE) |  | ||||||
| 	      { |  | ||||||
| 		*class_flag |= 4; |  | ||||||
| 	      } |  | ||||||
| 	    else |  | ||||||
| 	      { | 	      { | ||||||
| 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; | 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; | ||||||
| 	      } | 	      } | ||||||
|             continue; |             continue; | ||||||
| 
 | 
 | ||||||
|             case ESC_D: |             case ESC_D: | ||||||
| 	    if (options & PCRE_LOCALE) |  | ||||||
| 	      { |  | ||||||
| 		*class_flag |= 8; |  | ||||||
| 	      } |  | ||||||
| 	    else |  | ||||||
| 	      { | 	      { | ||||||
| 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; | 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; | ||||||
| 	      } | 	      } | ||||||
|  | @ -1360,22 +1350,12 @@ for (;; ptr++) | ||||||
|             continue; |             continue; | ||||||
| 
 | 
 | ||||||
|             case ESC_s: |             case ESC_s: | ||||||
| 	    if (options & PCRE_LOCALE) |  | ||||||
| 	      { |  | ||||||
| 		*class_flag |= 32; |  | ||||||
| 	      } |  | ||||||
| 	    else |  | ||||||
| 	      { | 	      { | ||||||
| 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; | 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; | ||||||
| 	      } | 	      } | ||||||
|             continue; |             continue; | ||||||
| 
 | 
 | ||||||
|             case ESC_S: |             case ESC_S: | ||||||
| 	    if (options & PCRE_LOCALE) |  | ||||||
| 	      { |  | ||||||
| 		*class_flag |= 32; |  | ||||||
| 	      } |  | ||||||
| 	    else |  | ||||||
| 	      { | 	      { | ||||||
| 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; | 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; | ||||||
| 	      } | 	      } | ||||||
|  | @ -1795,7 +1775,7 @@ for (;; ptr++) | ||||||
| 	      } | 	      } | ||||||
| 	      string = PyString_FromStringAndSize((char*)ptr, idlen); | 	      string = PyString_FromStringAndSize((char*)ptr, idlen); | ||||||
| 	      intobj = PyInt_FromLong( brackets[0] + 1 ); | 	      intobj = PyInt_FromLong( brackets[0] + 1 ); | ||||||
| 	      if (intobj == NULL || string==NULL) | 	      if (intobj == NULL || string == NULL) | ||||||
| 		{ | 		{ | ||||||
| 		  Py_XDECREF(string); | 		  Py_XDECREF(string); | ||||||
| 		  Py_XDECREF(intobj); | 		  Py_XDECREF(intobj); | ||||||
|  | @ -1803,7 +1783,7 @@ for (;; ptr++) | ||||||
| 		  goto FAILED; | 		  goto FAILED; | ||||||
| 		} | 		} | ||||||
| 	      PyDict_SetItem(dictionary, string, intobj); | 	      PyDict_SetItem(dictionary, string, intobj); | ||||||
| 	      Py_DECREF(string); Py_DECREF(intobj); | 	      Py_DECREF(string); Py_DECREF(intobj); /* XXX DECREF commented out! */ | ||||||
| 	      ptr += idlen+1;  /* Point to rest of expression */ | 	      ptr += idlen+1;  /* Point to rest of expression */ | ||||||
| 	      goto do_grouping_bracket; | 	      goto do_grouping_bracket; | ||||||
| 	    } | 	    } | ||||||
|  | @ -1820,7 +1800,6 @@ for (;; ptr++) | ||||||
| 	      } | 	      } | ||||||
| 	      string = PyString_FromStringAndSize((char *)ptr, idlen); | 	      string = PyString_FromStringAndSize((char *)ptr, idlen); | ||||||
| 	      if (string==NULL)	{ | 	      if (string==NULL)	{ | ||||||
| 		  Py_XDECREF(string); |  | ||||||
| 		  *errorptr = "exception raised"; | 		  *errorptr = "exception raised"; | ||||||
| 		  goto FAILED; | 		  goto FAILED; | ||||||
| 		} | 		} | ||||||
|  | @ -1833,6 +1812,10 @@ for (;; ptr++) | ||||||
| 
 | 
 | ||||||
| 	      refnum = PyInt_AsLong(intobj); | 	      refnum = PyInt_AsLong(intobj); | ||||||
| 	      Py_DECREF(string);  | 	      Py_DECREF(string);  | ||||||
|  | 	      /* The caller doesn't own the reference to the value
 | ||||||
|  | 		 returned from PyDict_GetItem, so intobj is not | ||||||
|  | 		 DECREF'ed. */ | ||||||
|  | 
 | ||||||
| 	      *code++ = OP_REF; | 	      *code++ = OP_REF; | ||||||
| 	      *code++ = refnum; | 	      *code++ = refnum; | ||||||
| 	      /* The continue will cause the top-level for() loop to
 | 	      /* The continue will cause the top-level for() loop to
 | ||||||
|  | @ -1943,7 +1926,7 @@ for (;; ptr++) | ||||||
|       continue; |       continue; | ||||||
|       } |       } | ||||||
| 
 | 
 | ||||||
|     /* Reset and fall through */ |     /* Data character: Reset and fall through */ | ||||||
| 
 | 
 | ||||||
|     ptr = oldptr; |     ptr = oldptr; | ||||||
|     c = '\\'; |     c = '\\'; | ||||||
|  | @ -2035,9 +2018,9 @@ Returns:    TRUE on success | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| compile_regex(int options, int *brackets, uschar **codeptr, | compile_regex(int options, int *brackets, uschar **codeptr, | ||||||
|   uschar **ptrptr, char **errorptr, PyObject *dictionary) |   const uschar **ptrptr, const char **errorptr, PyObject *dictionary) | ||||||
| { | { | ||||||
| uschar *ptr = *ptrptr; | const uschar *ptr = *ptrptr; | ||||||
| uschar *code = *codeptr; | uschar *code = *codeptr; | ||||||
| uschar *start_bracket = code; | uschar *start_bracket = code; | ||||||
| 
 | 
 | ||||||
|  | @ -2103,7 +2086,7 @@ Returns:   TRUE or FALSE | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| is_anchored(register uschar *code, BOOL multiline) | is_anchored(register const uschar *code, BOOL multiline) | ||||||
| { | { | ||||||
| do { | do { | ||||||
|    int op = (int)code[3]; |    int op = (int)code[3]; | ||||||
|  | @ -2132,7 +2115,7 @@ Returns:   TRUE or FALSE | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| is_startline(uschar *code) | is_startline(const uschar *code) | ||||||
| { | { | ||||||
| do { | do { | ||||||
|    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) | ||||||
|  | @ -2217,7 +2200,7 @@ Returns:       pointer to compiled data block, or NULL on error, | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| pcre * | pcre * | ||||||
| pcre_compile(const char *pattern, int options, char **errorptr,  | pcre_compile(const char *pattern, int options, const char **errorptr,  | ||||||
| 	     int *erroroffset, PyObject *dictionary) | 	     int *erroroffset, PyObject *dictionary) | ||||||
| { | { | ||||||
| real_pcre *re; | real_pcre *re; | ||||||
|  | @ -2227,9 +2210,10 @@ int runlength; | ||||||
| int c, size; | int c, size; | ||||||
| int bracount = 0; | int bracount = 0; | ||||||
| int brastack[200]; | int brastack[200]; | ||||||
| int brastackptr = 0; |  | ||||||
| int top_backref = 0; | int top_backref = 0; | ||||||
| uschar *code, *ptr; | unsigned int brastackptr = 0; | ||||||
|  | uschar *code; | ||||||
|  | const uschar *ptr; | ||||||
| 
 | 
 | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| uschar *code_base, *code_end; | uschar *code_base, *code_end; | ||||||
|  | @ -2268,7 +2252,7 @@ internal flag settings. Make an attempt to correct for any counted white space | ||||||
| if an "extended" flag setting appears late in the pattern. We can't be so | if an "extended" flag setting appears late in the pattern. We can't be so | ||||||
| clever for #-comments. */ | clever for #-comments. */ | ||||||
| 
 | 
 | ||||||
| ptr = (uschar *)(pattern - 1); | ptr = (const uschar *)(pattern - 1); | ||||||
| while ((c = *(++ptr)) != 0) | while ((c = *(++ptr)) != 0) | ||||||
|   { |   { | ||||||
|   int min, max; |   int min, max; | ||||||
|  | @ -2295,7 +2279,7 @@ while ((c = *(++ptr)) != 0) | ||||||
| 
 | 
 | ||||||
|     case '\\': |     case '\\': | ||||||
|       { |       { | ||||||
|       uschar *save_ptr = ptr; |       const uschar *save_ptr = ptr; | ||||||
|       c = check_escape(&ptr, errorptr, bracount, options, FALSE); |       c = check_escape(&ptr, errorptr, bracount, options, FALSE); | ||||||
|       if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |       if (*errorptr != NULL) goto PCRE_ERROR_RETURN; | ||||||
|       if (c >= 0) |       if (c >= 0) | ||||||
|  | @ -2585,7 +2569,7 @@ while ((c = *(++ptr)) != 0) | ||||||
| 
 | 
 | ||||||
|       if (c == '\\') |       if (c == '\\') | ||||||
|         { |         { | ||||||
|         uschar *saveptr = ptr; |         const uschar *saveptr = ptr; | ||||||
|         c = check_escape(&ptr, errorptr, bracount, options, FALSE); |         c = check_escape(&ptr, errorptr, bracount, options, FALSE); | ||||||
|         if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |         if (*errorptr != NULL) goto PCRE_ERROR_RETURN; | ||||||
|         if (c < 0) { ptr = saveptr; break; } |         if (c < 0) { ptr = saveptr; break; } | ||||||
|  | @ -2633,7 +2617,7 @@ re->options = options; | ||||||
| error, *errorptr will be set non-NULL, so we don't need to look at the result | error, *errorptr will be set non-NULL, so we don't need to look at the result | ||||||
| of the function here. */ | of the function here. */ | ||||||
| 
 | 
 | ||||||
| ptr = (uschar *)pattern; | ptr = (const uschar *)pattern; | ||||||
| code = re->code; | code = re->code; | ||||||
| *code = OP_BRA; | *code = OP_BRA; | ||||||
| bracount = 0; | bracount = 0; | ||||||
|  | @ -2661,7 +2645,7 @@ if (*errorptr != NULL) | ||||||
|   { |   { | ||||||
|   (pcre_free)(re); |   (pcre_free)(re); | ||||||
|   PCRE_ERROR_RETURN: |   PCRE_ERROR_RETURN: | ||||||
|   *erroroffset = ptr - (uschar *)pattern; |   *erroroffset = ptr - (const uschar *)pattern; | ||||||
|   return NULL; |   return NULL; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  | @ -2947,8 +2931,8 @@ switch(type) | ||||||
|   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0; |   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0; | ||||||
|   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0; |   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0; | ||||||
|   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0; |   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0; | ||||||
|   case OP_NOT_WORDCHAR_L: return (c!='_' && !isalpha(c)); |   case OP_NOT_WORDCHAR_L: return (c!='_' && !isalnum(c)); | ||||||
|   case OP_WORDCHAR_L:     return (c=='_' || isalpha(c)); |   case OP_WORDCHAR_L:     return (c=='_' || isalnum(c)); | ||||||
|   } |   } | ||||||
| return FALSE; | return FALSE; | ||||||
| } | } | ||||||
|  | @ -2971,9 +2955,9 @@ Returns:      TRUE if matched | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| match_ref(int number, register uschar *eptr, int length, match_data *md) | match_ref(int number, register const uschar *eptr, int length, match_data *md) | ||||||
| { | { | ||||||
| uschar *p = md->start_subject + md->offset_vector[number]; | const uschar *p = md->start_subject + md->offset_vector[number]; | ||||||
| 
 | 
 | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| if (eptr >= md->end_subject) | if (eptr >= md->end_subject) | ||||||
|  | @ -2992,7 +2976,7 @@ printf("\n"); | ||||||
| 
 | 
 | ||||||
| if (length > md->end_subject - p) return FALSE; | if (length > md->end_subject - p) return FALSE; | ||||||
| 
 | 
 | ||||||
| /* Separate the caselesss case for speed */ | /* Separate the caseless case for speed */ | ||||||
| 
 | 
 | ||||||
| if (md->caseless) | if (md->caseless) | ||||||
|   { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |   { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } | ||||||
|  | @ -3027,8 +3011,8 @@ static int grow_stack(match_data *md) | ||||||
|       else {md->length = 80;} |       else {md->length = 80;} | ||||||
|     } |     } | ||||||
|   PyMem_RESIZE(md->offset_top, int, md->length); |   PyMem_RESIZE(md->offset_top, int, md->length); | ||||||
|   PyMem_RESIZE(md->eptr, uschar *, md->length); |   PyMem_RESIZE(md->eptr, const uschar *, md->length); | ||||||
|   PyMem_RESIZE(md->ecode, uschar *, md->length); |   PyMem_RESIZE(md->ecode, const uschar *, md->length); | ||||||
|   PyMem_RESIZE(md->off_num, int, md->length); |   PyMem_RESIZE(md->off_num, int, md->length); | ||||||
|   PyMem_RESIZE(md->r1, int, md->length); |   PyMem_RESIZE(md->r1, int, md->length); | ||||||
|   PyMem_RESIZE(md->r2, int, md->length); |   PyMem_RESIZE(md->r2, int, md->length); | ||||||
|  | @ -3058,7 +3042,7 @@ Returns:       TRUE if matched | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| static BOOL | static BOOL | ||||||
| match(register uschar *eptr, register uschar *ecode, int offset_top, | match(register const uschar *eptr, register const uschar *ecode, int offset_top, | ||||||
|   match_data *md) |   match_data *md) | ||||||
| { | { | ||||||
|   int save_stack_position = md->point; |   int save_stack_position = md->point; | ||||||
|  | @ -3072,7 +3056,7 @@ for (;;) | ||||||
|   int min, max, ctype; |   int min, max, ctype; | ||||||
|   register int i; |   register int i; | ||||||
|   register int c; |   register int c; | ||||||
|   BOOL minimize; |   BOOL minimize = FALSE; | ||||||
| 
 | 
 | ||||||
|   /* Opening bracket. Check the alternative branches in turn, failing if none
 |   /* Opening bracket. Check the alternative branches in turn, failing if none
 | ||||||
|   match. We have to set the start offset if required and there is space |   match. We have to set the start offset if required and there is space | ||||||
|  | @ -3085,7 +3069,7 @@ for (;;) | ||||||
|   if ((int)*ecode >= OP_BRA) |   if ((int)*ecode >= OP_BRA) | ||||||
|     { |     { | ||||||
|     int number = (*ecode - OP_BRA) << 1; |     int number = (*ecode - OP_BRA) << 1; | ||||||
|     int save_offset1, save_offset2; |     int save_offset1 = 0, save_offset2 = 0; | ||||||
| 
 | 
 | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
|     printf("start bracket %d\n", number/2); |     printf("start bracket %d\n", number/2); | ||||||
|  | @ -3212,7 +3196,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|     case OP_BRAZERO: |     case OP_BRAZERO: | ||||||
|       { |       { | ||||||
|       uschar *next = ecode+1; |       const uschar *next = ecode+1; | ||||||
|       if (match(eptr, next, offset_top, md)) SUCCEED; |       if (match(eptr, next, offset_top, md)) SUCCEED; | ||||||
|       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); | ||||||
|       ecode = next + 3; |       ecode = next + 3; | ||||||
|  | @ -3221,7 +3205,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|     case OP_BRAMINZERO: |     case OP_BRAMINZERO: | ||||||
|       { |       { | ||||||
|       uschar *next = ecode+1; |       const uschar *next = ecode+1; | ||||||
|       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); | ||||||
|       if (match(eptr, next+3, offset_top, md)) SUCCEED; |       if (match(eptr, next+3, offset_top, md)) SUCCEED; | ||||||
|       ecode++; |       ecode++; | ||||||
|  | @ -3237,7 +3221,7 @@ for (;;) | ||||||
|     case OP_KETRMAX: |     case OP_KETRMAX: | ||||||
|       { |       { | ||||||
|       int number; |       int number; | ||||||
|       uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |       const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; | ||||||
| 
 | 
 | ||||||
|       if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |       if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) | ||||||
|         { |         { | ||||||
|  | @ -3279,11 +3263,10 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|       if (*ecode == OP_KETRMIN) |       if (*ecode == OP_KETRMIN) | ||||||
|         { |         { | ||||||
| 	uschar *ptr; | 	const uschar *ptr; | ||||||
| 	if (match(eptr, ecode+3, offset_top, md)) goto succeed; | 	if (match(eptr, ecode+3, offset_top, md)) goto succeed; | ||||||
| 	/* Handle alternation inside the BRA...KET; push the additional
 | 	/* Handle alternation inside the BRA...KET; push the additional
 | ||||||
| 	   alternatives onto the stack  | 	   alternatives onto the stack */ | ||||||
| 	   XXX this tries the alternatives backwards! */ |  | ||||||
| 	ptr=prev; | 	ptr=prev; | ||||||
| 	do { | 	do { | ||||||
| 	  ptr += (ptr[1]<<8)+ ptr[2]; | 	  ptr += (ptr[1]<<8)+ ptr[2]; | ||||||
|  | @ -3306,8 +3289,8 @@ for (;;) | ||||||
|         } |         } | ||||||
|       else  /* OP_KETRMAX */ |       else  /* OP_KETRMAX */ | ||||||
|         { |         { | ||||||
| 	uschar *ptr; | 	const uschar *ptr; | ||||||
| 	int points_pushed=0; | 	/*int points_pushed=0;*/ | ||||||
| 
 | 
 | ||||||
| 	/* Push one failure point, that will resume matching at the code after 
 | 	/* Push one failure point, that will resume matching at the code after 
 | ||||||
| 	   the KETRMAX opcode. */ | 	   the KETRMAX opcode. */ | ||||||
|  | @ -3325,8 +3308,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
| 	md->offset_vector[number] = eptr - md->start_subject; | 	md->offset_vector[number] = eptr - md->start_subject; | ||||||
| 	/* Handle alternation inside the BRA...KET; push each of the
 | 	/* Handle alternation inside the BRA...KET; push each of the
 | ||||||
| 	   additional alternatives onto the stack  | 	   additional alternatives onto the stack */ | ||||||
| 	   XXX this tries the alternatives backwards! */ |  | ||||||
| 	ptr=prev; | 	ptr=prev; | ||||||
| 	do { | 	do { | ||||||
| 	  ptr += (ptr[1]<<8)+ ptr[2]; | 	  ptr += (ptr[1]<<8)+ ptr[2]; | ||||||
|  | @ -3344,15 +3326,15 @@ for (;;) | ||||||
| 	      md->r2[md->point]         = 0;  | 	      md->r2[md->point]         = 0;  | ||||||
| 	      md->off_num[md->point]    = 0;  | 	      md->off_num[md->point]    = 0;  | ||||||
| 	      md->point++;	       | 	      md->point++;	       | ||||||
| 	      points_pushed++; | 	      /*points_pushed++;*/ | ||||||
| 	    } | 	    } | ||||||
| 	} while (*ptr==OP_ALT); | 	} while (*ptr==OP_ALT); | ||||||
| 	/* Jump to the first (or only) alternative and resume trying to match */ | 	/* Jump to the first (or only) alternative and resume trying to match */ | ||||||
| 	ecode=prev+3; goto match_loop; | 	ecode=prev+3; goto match_loop; | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     FAIL; |     break; | ||||||
| 
 |      | ||||||
|     /* Start of subject unless notbol, or after internal newline if multiline */ |     /* Start of subject unless notbol, or after internal newline if multiline */ | ||||||
| 
 | 
 | ||||||
|     case OP_CIRC: |     case OP_CIRC: | ||||||
|  | @ -3419,9 +3401,9 @@ for (;;) | ||||||
|     case OP_WORD_BOUNDARY_L: |     case OP_WORD_BOUNDARY_L: | ||||||
|       { |       { | ||||||
| 	BOOL prev_is_word = (eptr != md->start_subject) && | 	BOOL prev_is_word = (eptr != md->start_subject) && | ||||||
| 	  (isalpha(eptr[-1]) || eptr[-1]=='_'); | 	  (isalnum(eptr[-1]) || eptr[-1]=='_'); | ||||||
| 	BOOL cur_is_word = (eptr < md->end_subject) && | 	BOOL cur_is_word = (eptr < md->end_subject) && | ||||||
| 	  (isalpha(eptr[-1]) || eptr[-1]=='_'); | 	  (isalnum(*eptr) || *eptr=='_'); | ||||||
| 	if ((*ecode++ == OP_WORD_BOUNDARY_L)? | 	if ((*ecode++ == OP_WORD_BOUNDARY_L)? | ||||||
| 	    cur_is_word == prev_is_word : cur_is_word != prev_is_word) | 	    cur_is_word == prev_is_word : cur_is_word != prev_is_word) | ||||||
| 	  FAIL; | 	  FAIL; | ||||||
|  | @ -3474,14 +3456,14 @@ for (;;) | ||||||
|     break; |     break; | ||||||
| 
 | 
 | ||||||
|     case OP_NOT_WORDCHAR_L: |     case OP_NOT_WORDCHAR_L: | ||||||
|     if (eptr >= md->end_subject || (*eptr=='_' || isalpha(*eptr) )) |     if (eptr >= md->end_subject || (*eptr=='_' || isalnum(*eptr) )) | ||||||
|       return FALSE; |       return FALSE; | ||||||
|     eptr++; |     eptr++; | ||||||
|     ecode++; |     ecode++; | ||||||
|     break; |     break; | ||||||
| 
 | 
 | ||||||
|     case OP_WORDCHAR_L: |     case OP_WORDCHAR_L: | ||||||
|     if (eptr >= md->end_subject || (*eptr!='_' && !isalpha(*eptr) )) |     if (eptr >= md->end_subject || (*eptr!='_' && !isalnum(*eptr) )) | ||||||
|       return FALSE; |       return FALSE; | ||||||
|     eptr++; |     eptr++; | ||||||
|     ecode++; |     ecode++; | ||||||
|  | @ -3577,7 +3559,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; i++) |         for (i = min; i < max; i++) | ||||||
|           { |           { | ||||||
|           if (!match_ref(number, eptr, length, md)) break; |           if (!match_ref(number, eptr, length, md)) break; | ||||||
|  | @ -3601,8 +3583,8 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|     case OP_CLASS: |     case OP_CLASS: | ||||||
|       { |       { | ||||||
|       uschar *data = ecode + 1;  /* Save for matching */ |       const uschar *data = ecode + 1;  /* Save for matching */ | ||||||
|       ecode += 33;               /* Advance past the item */ |       ecode += 33;                     /* Advance past the item */ | ||||||
| 
 | 
 | ||||||
|       switch (*ecode) |       switch (*ecode) | ||||||
|         { |         { | ||||||
|  | @ -3685,7 +3667,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; eptr++, i++) |         for (i = min; i < max; eptr++, i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject) break; |           if (eptr >= md->end_subject) break; | ||||||
|  | @ -3710,8 +3692,8 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|    case OP_CLASS_L: |    case OP_CLASS_L: | ||||||
|      { |      { | ||||||
|      uschar *data = ecode + 1;  /* Save for matching */ |       const uschar *data = ecode + 1;  /* Save for matching */ | ||||||
|       uschar locale_flag = *data; |       const uschar locale_flag = *data; | ||||||
|       ecode++; data++;		/* The localization support adds an extra byte */ |       ecode++; data++;		/* The localization support adds an extra byte */ | ||||||
| 
 | 
 | ||||||
|       ecode += 33;               /* Advance past the item */ |       ecode += 33;               /* Advance past the item */ | ||||||
|  | @ -3744,8 +3726,8 @@ for (;;) | ||||||
|         if (eptr >= md->end_subject) FAIL; |         if (eptr >= md->end_subject) FAIL; | ||||||
|         c = *eptr++; |         c = *eptr++; | ||||||
|         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */ |         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */ | ||||||
| 	if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
| #if 0 | #if 0 | ||||||
| 	if ( (locale_flag &  4) && isdigit(c) ) continue;    /* Locale \d */ | 	if ( (locale_flag &  4) && isdigit(c) ) continue;    /* Locale \d */ | ||||||
| 	if ( (locale_flag &  8) && !isdigit(c) ) continue;   /* Locale \D */ | 	if ( (locale_flag &  8) && !isdigit(c) ) continue;   /* Locale \D */ | ||||||
|  | @ -3758,8 +3740,8 @@ for (;;) | ||||||
|           c = pcre_fcc[c]; |           c = pcre_fcc[c]; | ||||||
|           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */ |           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */ | ||||||
| 
 | 
 | ||||||
| 	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
|           } |           } | ||||||
|         FAIL; |         FAIL; | ||||||
|         } |         } | ||||||
|  | @ -3771,15 +3753,15 @@ for (;;) | ||||||
|         if (eptr >= md->end_subject) FAIL; |         if (eptr >= md->end_subject) FAIL; | ||||||
|         c = *eptr++; |         c = *eptr++; | ||||||
|         if ((data[c/8] & (1 << (c&7))) != 0) continue; |         if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
| 
 | 
 | ||||||
|         if (md->runtime_caseless) |         if (md->runtime_caseless) | ||||||
|           { |           { | ||||||
|           c = pcre_fcc[c]; |           c = pcre_fcc[c]; | ||||||
|           if ((data[c/8] & (1 << (c&7))) != 0) continue; |           if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
|           } |           } | ||||||
|         FAIL; |         FAIL; | ||||||
|         } |         } | ||||||
|  | @ -3800,15 +3782,15 @@ for (;;) | ||||||
|           if (i >= max || eptr >= md->end_subject) FAIL; |           if (i >= max || eptr >= md->end_subject) FAIL; | ||||||
|           c = *eptr++; |           c = *eptr++; | ||||||
|           if ((data[c/8] & (1 << (c&7))) != 0) continue; |           if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
| 
 | 
 | ||||||
|           if (md->runtime_caseless) |           if (md->runtime_caseless) | ||||||
|             { |             { | ||||||
|             c = pcre_fcc[c]; |             c = pcre_fcc[c]; | ||||||
|             if ((data[c/8] & (1 << (c&7))) != 0) continue; |             if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	    if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	    if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	    if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	    if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
|             } |             } | ||||||
|           FAIL; |           FAIL; | ||||||
|           } |           } | ||||||
|  | @ -3819,20 +3801,20 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; eptr++, i++) |         for (i = min; i < max; eptr++, i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject) break; |           if (eptr >= md->end_subject) break; | ||||||
|           c = *eptr; |           c = *eptr; | ||||||
|           if ((data[c/8] & (1 << (c&7))) != 0) continue; |           if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
|           if (md->runtime_caseless) |           if (md->runtime_caseless) | ||||||
|             { |             { | ||||||
|             c = pcre_fcc[c]; |             c = pcre_fcc[c]; | ||||||
|             if ((data[c/8] & (1 << (c&7))) != 0) continue; |             if ((data[c/8] & (1 << (c&7))) != 0) continue; | ||||||
| 	    if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */ | 	    if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */ | ||||||
| 	    if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */ | 	    if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */ | ||||||
|             } |             } | ||||||
|           break; |           break; | ||||||
|           } |           } | ||||||
|  | @ -3941,7 +3923,7 @@ for (;;) | ||||||
|         } |         } | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; i++) |         for (i = min; i < max; i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |           if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; | ||||||
|  | @ -3971,7 +3953,7 @@ for (;;) | ||||||
|         } |         } | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; i++) |         for (i = min; i < max; i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject || c != *eptr) break; |           if (eptr >= md->end_subject || c != *eptr) break; | ||||||
|  | @ -4068,7 +4050,7 @@ for (;;) | ||||||
|         } |         } | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; i++) |         for (i = min; i < max; i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |           if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; | ||||||
|  | @ -4098,7 +4080,7 @@ for (;;) | ||||||
|         } |         } | ||||||
|       else |       else | ||||||
|         { |         { | ||||||
|         uschar *pp = eptr; |         const uschar *pp = eptr; | ||||||
|         for (i = min; i < max; i++) |         for (i = min; i < max; i++) | ||||||
|           { |           { | ||||||
|           if (eptr >= md->end_subject || c == *eptr) break; |           if (eptr >= md->end_subject || c == *eptr) break; | ||||||
|  | @ -4191,12 +4173,12 @@ for (;;) | ||||||
|       break; |       break; | ||||||
| 
 | 
 | ||||||
|       case OP_NOT_WORDCHAR_L: |       case OP_NOT_WORDCHAR_L: | ||||||
|       for (i = 1; i <= min; i++, eptr++) if (*eptr=='_' || isalpha(*eptr)) |       for (i = 1; i <= min; i++, eptr++) if (*eptr=='_' || isalnum(*eptr)) | ||||||
|         return FALSE; |         return FALSE; | ||||||
|       break; |       break; | ||||||
| 
 | 
 | ||||||
|       case OP_WORDCHAR_L: |       case OP_WORDCHAR_L: | ||||||
|       for (i = 1; i <= min; i++, eptr++) if (*eptr!='_' && !isalpha(*eptr)) |       for (i = 1; i <= min; i++, eptr++) if (*eptr!='_' && !isalnum(*eptr)) | ||||||
|         return FALSE; |         return FALSE; | ||||||
|       break; |       break; | ||||||
|       } |       } | ||||||
|  | @ -4225,7 +4207,7 @@ for (;;) | ||||||
| 
 | 
 | ||||||
|     else |     else | ||||||
|       { |       { | ||||||
|       uschar *pp = eptr; |       const uschar *pp = eptr; | ||||||
|       switch(ctype) |       switch(ctype) | ||||||
|         { |         { | ||||||
|         case OP_ANY: |         case OP_ANY: | ||||||
|  | @ -4301,7 +4283,7 @@ for (;;) | ||||||
| 	case OP_NOT_WORDCHAR_L: | 	case OP_NOT_WORDCHAR_L: | ||||||
| 	  for (i = min; i < max; i++) | 	  for (i = min; i < max; i++) | ||||||
|          { |          { | ||||||
|          if (eptr >= md->end_subject || (*eptr=='_' || isalpha(*eptr) ) ) |          if (eptr >= md->end_subject || (*eptr=='_' || isalnum(*eptr) ) ) | ||||||
|            break; |            break; | ||||||
|          eptr++; |          eptr++; | ||||||
|          } |          } | ||||||
|  | @ -4310,7 +4292,7 @@ for (;;) | ||||||
|        case OP_WORDCHAR_L: |        case OP_WORDCHAR_L: | ||||||
|        for (i = min; i < max; i++) |        for (i = min; i < max; i++) | ||||||
|          { |          { | ||||||
|          if (eptr >= md->end_subject || (*eptr!='_' && !isalpha(*eptr) ) ) |          if (eptr >= md->end_subject || (*eptr!='_' && !isalnum(*eptr) ) ) | ||||||
|              break; |              break; | ||||||
|           eptr++; |           eptr++; | ||||||
|           } |           } | ||||||
|  | @ -4399,17 +4381,20 @@ int | ||||||
| pcre_exec(const pcre *external_re, const pcre_extra *external_extra, | pcre_exec(const pcre *external_re, const pcre_extra *external_extra, | ||||||
|   const char *subject, int length, int options, int *offsets, int offsetcount) |   const char *subject, int length, int options, int *offsets, int offsetcount) | ||||||
| { | { | ||||||
| int resetcount; |   /* The "volatile" directives are to make gcc -Wall stop complaining
 | ||||||
| int ocount = offsetcount; |      that these variables can be clobbered by the longjmp.  Hopefully | ||||||
| int first_char = -1; |      they won't cost too much performance. */  | ||||||
|  | volatile int resetcount; | ||||||
|  | volatile int ocount = offsetcount; | ||||||
|  | volatile int first_char = -1; | ||||||
| match_data match_block; | match_data match_block; | ||||||
| uschar *start_bits = NULL; | volatile const uschar *start_bits = NULL; | ||||||
| uschar *start_match = (uschar *)subject; | const uschar *start_match = (uschar *)subject; | ||||||
| uschar *end_subject; | const uschar *end_subject; | ||||||
| real_pcre *re = (real_pcre *)external_re; | const real_pcre *re = (const real_pcre *)external_re; | ||||||
| real_pcre_extra *extra = (real_pcre_extra *)external_extra; | const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; | ||||||
| BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; | volatile BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; | ||||||
| BOOL startline = (re->options & PCRE_STARTLINE) != 0; | volatile BOOL startline = (re->options & PCRE_STARTLINE) != 0; | ||||||
| 
 | 
 | ||||||
| if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; | if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; | ||||||
| 
 | 
 | ||||||
|  | @ -4417,7 +4402,7 @@ if (re == NULL || subject == NULL || | ||||||
|    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; | ||||||
| if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; | if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; | ||||||
| 
 | 
 | ||||||
| match_block.start_subject = (uschar *)subject; | match_block.start_subject = (const uschar *)subject; | ||||||
| match_block.end_subject = match_block.start_subject + length; | match_block.end_subject = match_block.start_subject + length; | ||||||
| end_subject = match_block.end_subject; | end_subject = match_block.end_subject; | ||||||
| 
 | 
 | ||||||
|  | @ -4626,13 +4611,3 @@ return match_block.errorcode; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* End of pcre.c */ | /* End of pcre.c */ | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Guido van Rossum
						Guido van Rossum