| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *  Support for PCRE regex variant | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Copyright (C) 2007-2013 Sourcefire, Inc. | 
					
						
							|  |  |  |  *  Copyright (C) 2014 Cisco Systems, Inc. | 
					
						
							|  |  |  |  *  All Rights Reserved. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Authors: Kevin Lin | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  This program is free software; you can redistribute it and/or modify | 
					
						
							|  |  |  |  *  it under the terms of the GNU General Public License version 2 as | 
					
						
							|  |  |  |  *  published by the Free Software Foundation. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  *  GNU General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  *  along with this program; if not, write to the Free Software | 
					
						
							|  |  |  |  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, | 
					
						
							|  |  |  |  *  MA 02110-1301, USA. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if HAVE_CONFIG_H
 | 
					
						
							|  |  |  | #include "clamav-config.h"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  | #if HAVE_PCRE
 | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | #include <pcre.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "clamav.h"
 | 
					
						
							|  |  |  | #include "cltypes.h"
 | 
					
						
							|  |  |  | #include "others.h"
 | 
					
						
							|  |  |  | #include "regex_pcre.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* TODO: redefine pcre_malloc and pcre_free */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  | int cli_pcre_parse(struct cli_pcre_data *pd, const char *pattern) | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | { | 
					
						
							|  |  |  |     if (!pd || !pattern) { | 
					
						
							|  |  |  |         cli_errmsg("cli_pcre_parse: NULL pd or NULL pattern\n"); | 
					
						
							|  |  |  |         return CL_ENULLARG; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* copy expression to struct cli_pcre_data */ | 
					
						
							|  |  |  |     pd->expression = cli_strdup(pattern); | 
					
						
							|  |  |  |     if (!(pd->expression)) { | 
					
						
							|  |  |  |         cli_errmsg("cli_pcre_parse: Unable to allocate memory\n"); | 
					
						
							|  |  |  |         return CL_EMEM; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  |     return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  | int cli_pcre_addoptions(struct cli_pcre_data *pd, const char **opt, int errout) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (!pd || !opt || !(*opt)) | 
					
						
							|  |  |  |         return CL_ENULLARG; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while (**opt != '\0') { | 
					
						
							|  |  |  |         switch(**opt) { | 
					
						
							|  |  |  |         case 'i':  pd->options |= PCRE_CASELESS;            break; | 
					
						
							|  |  |  |         case 's':  pd->options |= PCRE_DOTALL;              break; | 
					
						
							|  |  |  |         case 'm':  pd->options |= PCRE_MULTILINE;           break; | 
					
						
							|  |  |  |         case 'x':  pd->options |= PCRE_EXTENDED;            break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             /* these are pcre specific... don't work with perl */ | 
					
						
							|  |  |  |         case 'A':  pd->options |= PCRE_ANCHORED;            break; | 
					
						
							|  |  |  |         case 'E':  pd->options |= PCRE_DOLLAR_ENDONLY;      break; | 
					
						
							|  |  |  |         case 'G':  pd->options |= PCRE_UNGREEDY;            break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         default: | 
					
						
							|  |  |  |             if (errout) { | 
					
						
							|  |  |  |                 cli_errmsg("cli_pcre_addoptions: unknown/extra pcre option encountered %c\n", **opt); | 
					
						
							|  |  |  |                 return CL_EMALFDB; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else | 
					
						
							|  |  |  |                 return CL_EPARSE; /* passed to caller to handle */ | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         (*opt)++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int cli_pcre_compile(struct cli_pcre_data *pd, long long unsigned match_limit, long long unsigned match_limit_recursion, unsigned int options, int opt_override) | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  | { | 
					
						
							|  |  |  |     const char *error; | 
					
						
							|  |  |  |     int erroffset; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!pd || !pd->expression) { | 
					
						
							|  |  |  |         cli_errmsg("cli_pcre_compile: NULL pd or NULL pd->expression\n"); | 
					
						
							|  |  |  |         return CL_ENULLARG; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  |     /* compile the pcre regex last arg is charset, allow for options override */ | 
					
						
							|  |  |  |     if (opt_override) | 
					
						
							|  |  |  |         pd->re = pcre_compile(pd->expression, options, &error, &erroffset, NULL); /* pd->re handled by libpcre -> call pcre_free() -> calls free() */ | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |         pd->re = pcre_compile(pd->expression, pd->options, &error, &erroffset, NULL); /* pd->re handled by libpcre -> call pcre_free() -> calls free() */ | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     if (pd->re == NULL) { | 
					
						
							|  |  |  |         cli_errmsg("cli_pcre_parse: PCRE compilation failed at offset %d: %s\n", erroffset, error); | 
					
						
							|  |  |  |         return CL_EPARSE; /* TODO - change ERRORCODE */ | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* now study it... (section totally not from snort) */ | 
					
						
							|  |  |  |     pd->ex = pcre_study(pd->re, 0, &error); | 
					
						
							|  |  |  |     if (!(pd->ex)) { | 
					
						
							|  |  |  |         /* TODO: this is complicated because pcre will use system malloc */ | 
					
						
							|  |  |  |         pd->ex = (pcre_extra *)cli_calloc(1, sizeof(*(pd->ex))); | 
					
						
							|  |  |  |         if (!(pd->ex)) { | 
					
						
							|  |  |  |             cli_errmsg("cli_pcre_parse: Unable to allocate memory\n"); | 
					
						
							|  |  |  |             return CL_EMEM; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  |     /* set the match limits */ | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT) { | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  |         pd->ex->match_limit = match_limit; | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT; | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  |         pd->ex->match_limit = match_limit; | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  |     /* set the recursion match limits */ | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
 | 
					
						
							|  |  |  |     if (pd->ex->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) { | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  |         pd->ex->match_limit_recursion = match_limit_recursion; | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         pd->ex->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; | 
					
						
							| 
									
										
										
										
											2014-08-25 19:11:12 -04:00
										 |  |  |         pd->ex->match_limit_recursion = match_limit_recursion; | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  |     } | 
					
						
							|  |  |  | #endif /* PCRE_EXTRA_MATCH_LIMIT_RECURSION */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* non-dynamic allocated fields set by caller */ | 
					
						
							|  |  |  |     return CL_SUCCESS; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  | #define DISABLE_PCRE_REPORT 0
 | 
					
						
							|  |  |  | #define MATCH_MAXLEN 1028 /*because lolz*/
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* TODO: audit this function, how to handle the named substring name? */ | 
					
						
							|  |  |  | static void named_substr_print(struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector, size_t ovlen) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     int i, j, length, namecount, trunc; | 
					
						
							|  |  |  |     unsigned char *tabptr; | 
					
						
							|  |  |  |     int name_entry_size; | 
					
						
							|  |  |  |     unsigned char *name_table; | 
					
						
							|  |  |  |     const char *start; | 
					
						
							|  |  |  |     char outstr[2*MATCH_MAXLEN+1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* determine if there are named substrings */ | 
					
						
							|  |  |  |     (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount); | 
					
						
							|  |  |  |     if (namecount <= 0) { | 
					
						
							|  |  |  |         cli_dbgmsg("named_substr: no named substrings\n"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         cli_dbgmsg("named_substr: named substrings\n"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /* extract named substring translation table */ | 
					
						
							|  |  |  |         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table); | 
					
						
							|  |  |  |         (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /* print named substring information */ | 
					
						
							|  |  |  |         tabptr = name_table; | 
					
						
							|  |  |  |         for (i = 0; i < namecount; i++) { | 
					
						
							|  |  |  |             int n = (tabptr[0] << 8) | tabptr[1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             start = buffer + ovector[2*n]; | 
					
						
							|  |  |  |             length = ovector[2*n+1] - ovector[2*n]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             trunc = 0; | 
					
						
							|  |  |  |             if (length > MATCH_MAXLEN) { | 
					
						
							|  |  |  |                 trunc = 1; | 
					
						
							|  |  |  |                 length = MATCH_MAXLEN; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             for (j = 0; j < length; ++j) | 
					
						
							|  |  |  |                 snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             cli_dbgmsg("named_substr:  (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2, | 
					
						
							|  |  |  |                        outstr, trunc ? " (trunc)":""); | 
					
						
							| 
									
										
										
										
											2014-09-03 15:41:06 -04:00
										 |  |  |             /*
 | 
					
						
							|  |  |  |             cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2, | 
					
						
							|  |  |  |                        length, start, trunc ? " (trunc)":""); | 
					
						
							|  |  |  |             */ | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  |             tabptr += name_entry_size; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* TODO: audit this function */ | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  | int cli_pcre_match(struct cli_pcre_data *pd, const unsigned char *buffer, uint32_t buflen, int override_offset, int options, int *ovector, size_t ovlen) | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  |     int rc, startoffset, i, j, length, trunc; | 
					
						
							|  |  |  |     const char *start; | 
					
						
							|  |  |  |     char outstr[2*MATCH_MAXLEN+1]; | 
					
						
							| 
									
										
										
										
											2014-08-22 17:29:40 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (ovlen % 3) { | 
					
						
							|  |  |  |         cli_dbgmsg("cli_pcre_match: ovector length is not a multiple of 3\n"); | 
					
						
							|  |  |  |         return CL_EARG; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  |     /* set the startoffset, override if a value is specified */ | 
					
						
							| 
									
										
										
										
											2014-08-26 12:31:11 -04:00
										 |  |  |     startoffset = pd->search_offset; | 
					
						
							|  |  |  |     if (override_offset >= 0) | 
					
						
							|  |  |  |         startoffset = override_offset; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  |     /* execute the pcre */ | 
					
						
							| 
									
										
										
										
											2014-09-02 17:22:22 -04:00
										 |  |  |     rc = pcre_exec(pd->re, pd->ex, buffer, buflen, startoffset, options, ovector, ovlen); | 
					
						
							| 
									
										
										
										
											2014-08-22 17:29:40 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:00:27 -04:00
										 |  |  |     /* print out additional diagnostics if cli_debug_flag is set */ | 
					
						
							|  |  |  |     if (!DISABLE_PCRE_REPORT && cli_debug_flag) { | 
					
						
							|  |  |  |         cli_dbgmsg("\n"); | 
					
						
							|  |  |  |         cli_dbgmsg("cli_pcre_match: PCRE Execution Report:\n"); | 
					
						
							|  |  |  |         if (rc > 0) { | 
					
						
							|  |  |  |             /* print out full-match and capture groups */ | 
					
						
							|  |  |  |             for (i = 0; i < rc; ++i) { | 
					
						
							|  |  |  |                 start = buffer + ovector[2*i]; | 
					
						
							|  |  |  |                 length = ovector[2*i+1] - ovector[2*i]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 trunc = 0; | 
					
						
							|  |  |  |                 if (length > MATCH_MAXLEN) { | 
					
						
							|  |  |  |                     trunc = 1; | 
					
						
							|  |  |  |                     length = MATCH_MAXLEN; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for (j = 0; j < length; ++j)  | 
					
						
							|  |  |  |                     snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 cli_dbgmsg("cli_pcre_match:  %d: %s%s\n", i, outstr, trunc ? " (trunc)":""); | 
					
						
							|  |  |  |                 //cli_dbgmsg("cli_pcre_match:  %d: %.*s%s\n", i, length, start, trunc ? " (trunc)":"");
 | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             named_substr_print(pd, buffer, ovector, ovlen); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else if (rc == 0 || rc == PCRE_ERROR_NOMATCH) { | 
					
						
							|  |  |  |             cli_dbgmsg("cli_pcre_match: no match found\n"); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             cli_dbgmsg("cli_pcre_match: error occurred in pcre_match: %d\n", rc); | 
					
						
							|  |  |  |             /* error handled by caller */ | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         cli_dbgmsg("cli_pcre_match: PCRE Execution Report End\n"); | 
					
						
							|  |  |  |         cli_dbgmsg("\n"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-08-22 17:29:40 -04:00
										 |  |  |     return rc; | 
					
						
							| 
									
										
										
										
											2014-08-22 14:39:17 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void cli_pcre_free_single(struct cli_pcre_data *pd) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (pd->re) { | 
					
						
							|  |  |  |         pcre_free(pd->re); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (pd->ex) { | 
					
						
							|  |  |  |         free(pd->ex); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (pd->expression) { | 
					
						
							|  |  |  |         free(pd->expression); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2014-08-25 15:07:30 -04:00
										 |  |  | #endif /* HAVE_PCRE */
 |