* libclamav/htmlnorm.c: RFC2397 ("data" URL scheme) support.

* libclamav/scanner.c: scan RFC2397 data.


git-svn: trunk@1244
This commit is contained in:
Trog 2005-01-14 14:56:09 +00:00
parent f045896aaa
commit a92110df94
3 changed files with 316 additions and 68 deletions

View file

@ -1,3 +1,8 @@
Fri Jan 14 14:53:59 GMT 2005 (trog)
-----------------------------------
* libclamav/htmlnorm.c: RFC2397 ("data" URL scheme) support.
* libclamav/scanner.c: scan RFC2397 data.
Wed Jan 12 08:58:29 GMT 2005 (njh) Wed Jan 12 08:58:29 GMT 2005 (njh)
---------------------------------- ----------------------------------
* clamav-milter: Fixed DNS resolution error messages which could print * clamav-milter: Fixed DNS resolution error messages which could print

View file

@ -68,6 +68,12 @@ typedef enum {
HTML_JSDECODE_LENGTH, HTML_JSDECODE_LENGTH,
HTML_JSDECODE_DECRYPT, HTML_JSDECODE_DECRYPT,
HTML_SPECIAL_CHAR, HTML_SPECIAL_CHAR,
HTML_RFC2397_TYPE,
HTML_RFC2397_INIT,
HTML_RFC2397_DATA,
HTML_RFC2397_FINISH,
HTML_RFC2397_ESC,
HTML_ESCAPE_CHAR,
} html_state; } html_state;
typedef enum { typedef enum {
@ -383,17 +389,18 @@ void html_tag_arg_free(tag_arguments_t *tags)
static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs) static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs)
{ {
int fd_tmp, tag_length, tag_arg_length; int fd_tmp, tag_length, tag_arg_length, binary;
int retval=FALSE, escape, value, hex, tag_val_length, table_pos, in_script=FALSE; int retval=FALSE, escape, value, hex, tag_val_length, table_pos, in_script=FALSE;
FILE *stream_in; FILE *stream_in;
html_state state=HTML_NORM, next_state=HTML_BAD_STATE; html_state state=HTML_NORM, next_state=HTML_BAD_STATE;
char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1]; char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1];
char tag_val[HTML_STR_LENGTH+1]; char tag_val[HTML_STR_LENGTH+1], *tmp_file;
unsigned char *line, *ptr, *arg_value; unsigned char *line, *ptr, *arg_value;
tag_arguments_t tag_args; tag_arguments_t tag_args;
quoted_state quoted; quoted_state quoted;
unsigned long length; unsigned long length;
file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script; file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script;
file_buff_t *file_tmp_o1;
if (!m_area) { if (!m_area) {
if (fd < 0) { if (fd < 0) {
@ -417,6 +424,11 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
tag_args.value = NULL; tag_args.value = NULL;
if (dirname) { if (dirname) {
snprintf(filename, 1024, "%s/rfc2397", dirname);
if (mkdir(filename, 0700)) {
file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
goto abort;
}
file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t)); file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
if (!file_buff_o1) { if (!file_buff_o1) {
file_buff_o1 = file_buff_o2 = file_buff_script = NULL; file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
@ -483,18 +495,20 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
file_buff_script = NULL; file_buff_script = NULL;
} }
binary = FALSE;
ptr = line = cli_readline(stream_in, m_area, 8192); ptr = line = cli_readline(stream_in, m_area, 8192);
while (line) { while (line) {
while (*ptr && isspace(*ptr)) { while (*ptr && isspace(*ptr)) {
ptr++; ptr++;
} }
while (*ptr) { while (*ptr) {
if (*ptr == '\n') { if (!binary && *ptr == '\n') {
/* Convert it to a space and re-process */ /* Convert it to a space and re-process */
*ptr = ' '; *ptr = ' ';
continue; continue;
} }
if (*ptr == '\r') { if (!binary && *ptr == '\r') {
ptr++; ptr++;
continue; continue;
} }
@ -647,7 +661,42 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
} }
break; break;
case HTML_TAG_ARG_VAL: case HTML_TAG_ARG_VAL:
if (*ptr == '&') { if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
/* RFC2397 inline data */
/* Rewind one byte so we don't recursuive */
if (file_buff_o1 && (file_buff_o1->length > 0)) {
file_buff_o1->length--;
}
if (file_buff_o2 && (file_buff_o2->length > 0)) {
file_buff_o2->length--;
}
if (quoted != NOT_QUOTED) {
html_output_c(file_buff_o1, file_buff_o2, '"');
}
tag_val_length = 0;
state = HTML_RFC2397_TYPE;
next_state = HTML_TAG_ARG;
} else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
/* RFC2397 inline data */
/* Rewind one byte so we don't recursuive */
if (file_buff_o1 && (file_buff_o1->length > 0)) {
file_buff_o1->length--;
}
if (file_buff_o2 && (file_buff_o2->length > 0)) {
file_buff_o2->length--;
}
if (quoted != NOT_QUOTED) {
html_output_c(file_buff_o1, file_buff_o2, '"');
}
tag_val_length = 0;
state = HTML_RFC2397_TYPE;
next_state = HTML_TAG_ARG;
} else if (*ptr == '&') {
state = HTML_CHAR_REF; state = HTML_CHAR_REF;
next_state = HTML_TAG_ARG_VAL; next_state = HTML_TAG_ARG_VAL;
ptr++; ptr++;
@ -923,6 +972,188 @@ static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag
ptr++; ptr++;
length--; length--;
break; break;
case HTML_RFC2397_TYPE:
if (*ptr == '\'') {
if (!escape && (quoted==SINGLE_QUOTED)) {
/* Early end of data detected. Error */
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
} else if (*ptr == '"') {
if (!escape && (quoted==DOUBLE_QUOTED)) {
/* Early end of data detected. Error */
ptr++;
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = '"';
}
ptr++;
}
} else if (isspace(*ptr) || (*ptr == '>')) {
if (quoted == NOT_QUOTED) {
/* Early end of data detected. Error */
state = HTML_SKIP_WS;
tag_arg_length=0;
next_state = HTML_TAG_ARG;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
if (isspace(*ptr)) {
tag_val[tag_val_length++] = ' ';
} else {
tag_val[tag_val_length++] = '>';
}
}
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
next_state = HTML_RFC2397_TYPE;
ptr++;
}
} else if (*ptr == ',') {
/* Beginning of data */
tag_val[tag_val_length] = '\0';
state = HTML_RFC2397_INIT;
escape = FALSE;
next_state = HTML_BAD_STATE;
ptr++;
} else {
if (tag_val_length < HTML_STR_LENGTH) {
tag_val[tag_val_length++] = tolower(*ptr);
}
ptr++;
}
if (*ptr == '\\') {
escape = TRUE;
} else {
escape = FALSE;
}
break;
case HTML_RFC2397_INIT:
file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
if (!file_tmp_o1) {
goto abort;
}
snprintf(filename, 1024, "%s/rfc2397", dirname);
tmp_file = cli_gentemp(filename);
cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
free(tmp_file);
if (!file_tmp_o1->fd) {
cli_dbgmsg("open failed: %s\n", filename);
free(file_tmp_o1);
goto abort;
}
file_tmp_o1->length = 0;
html_output_str(file_tmp_o1, "From html-normalise\n", 20);
html_output_str(file_tmp_o1, "Content-type: ", 14);
if ((tag_val_length == 0) && (*tag_val == ';')) {
html_output_str(file_tmp_o1, "text/plain\n", 11);
}
html_output_str(file_tmp_o1, tag_val, tag_val_length);
html_output_c(file_tmp_o1, NULL, '\n');
if (strstr(tag_val, ";base64") != NULL) {
html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
}
html_output_c(file_tmp_o1, NULL, '\n');
state = HTML_RFC2397_DATA;
binary = TRUE;
break;
case HTML_RFC2397_DATA:
if (*ptr == '&') {
state = HTML_CHAR_REF;
next_state = HTML_RFC2397_DATA;
ptr++;
} else if (*ptr == '%') {
length = 0;
value = 0;
state = HTML_ESCAPE_CHAR;
next_state = HTML_RFC2397_ESC;
ptr++;
} else if (*ptr == '\'') {
if (!escape && (quoted==SINGLE_QUOTED)) {
state = HTML_RFC2397_FINISH;
ptr++;
} else {
html_output_c(file_tmp_o1, NULL, *ptr);
ptr++;
}
} else if (*ptr == '\"') {
if (!escape && (quoted=DOUBLE_QUOTED)) {
state = HTML_RFC2397_FINISH;
ptr++;
} else {
html_output_c(file_tmp_o1, NULL, *ptr);
ptr++;
}
} else if (isspace(*ptr) || (*ptr == '>')) {
if (quoted == NOT_QUOTED) {
state = HTML_RFC2397_FINISH;
ptr++;
} else {
html_output_c(file_tmp_o1, NULL, *ptr);
ptr++;
}
} else {
html_output_c(file_tmp_o1, NULL, *ptr);
ptr++;
}
if (*ptr == '\\') {
escape = TRUE;
} else {
escape = FALSE;
}
break;
case HTML_RFC2397_FINISH:
html_output_flush(file_tmp_o1);
close(file_tmp_o1->fd);
free(file_tmp_o1);
state = HTML_SKIP_WS;
escape = FALSE;
quoted = NOT_QUOTED;
next_state = HTML_TAG_ARG;
binary = FALSE;
break;
case HTML_RFC2397_ESC:
if (length == 2) {
html_output_c(file_tmp_o1, NULL, value);
} else if (length == 1) {
html_output_c(file_tmp_o1, NULL, '%');
html_output_c(file_tmp_o1, NULL, value+'0');
} else {
html_output_c(file_tmp_o1, NULL, '%');
}
state = HTML_RFC2397_DATA;
break;
case HTML_ESCAPE_CHAR:
value *= 16;
length++;
if (isxdigit(*ptr)) {
if (isdigit(*ptr)) {
value += (*ptr - '0');
} else {
value += (tolower(*ptr) - 'a' + 10);
}
} else {
state = next_state;
}
if (length == 2) {
state = next_state;
}
ptr++;
break;
} }
} }
free(line); free(line);

View file

@ -719,67 +719,6 @@ static int cli_scanmscab(int desc, const char **virname, long int *scanned, cons
return ret; return ret;
} }
static int cli_scanhtml(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec)
{
char *tempname, fullname[1024];
int ret=CL_CLEAN, fd;
cli_dbgmsg("in cli_scanhtml()\n");
tempname = cli_gentemp(NULL);
if(mkdir(tempname, 0700)) {
cli_dbgmsg("ScanHTML -> Can't create temporary directory %s\n", tempname);
return CL_ETMPDIR;
}
html_normalise_fd(desc, tempname, NULL);
snprintf(fullname, 1024, "%s/comment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
if(ret < 0 || ret == CL_VIRUS) {
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/nocomment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
if(ret < 0 || ret == CL_VIRUS) {
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/script.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
static int cli_scandir(const char *dirname, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec) static int cli_scandir(const char *dirname, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec)
{ {
DIR *dd; DIR *dd;
@ -986,6 +925,79 @@ static int cli_vba_scandir(const char *dirname, const char **virname, long int *
return ret; return ret;
} }
static int cli_scanhtml(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec)
{
char *tempname, fullname[1024];
int ret=CL_CLEAN, fd;
cli_dbgmsg("in cli_scanhtml()\n");
tempname = cli_gentemp(NULL);
if(mkdir(tempname, 0700)) {
cli_dbgmsg("ScanHTML -> Can't create temporary directory %s\n", tempname);
return CL_ETMPDIR;
}
html_normalise_fd(desc, tempname, NULL);
snprintf(fullname, 1024, "%s/comment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
if(ret < 0 || ret == CL_VIRUS) {
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/nocomment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
if(ret < 0 || ret == CL_VIRUS) {
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/script.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
if(ret < 0 || ret == CL_VIRUS) {
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
if (ret == CL_CLEAN) {
snprintf(fullname, 1024, "%s/rfc2397", tempname);
ret = cli_scandir(fullname, virname, scanned, root, limits, options, arec, mrec);
}
if(!cli_leavetemps_flag)
cli_rmdirs(tempname);
free(tempname);
return ret;
}
static int cli_scanole2(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec) static int cli_scanole2(int desc, const char **virname, long int *scanned, const struct cl_node *root, const struct cl_limits *limits, unsigned int options, int *arec, int *mrec)
{ {
char *dir; char *dir;