Handle boundary= "foo"

git-svn: trunk@821
This commit is contained in:
Nigel Horne 2004-09-03 16:02:25 +00:00
parent 335d166377
commit 28010d29e4
3 changed files with 87 additions and 169 deletions

View file

@ -1,3 +1,9 @@
Fri Sep 3 17:00:28 BST 2004 (njh)
----------------------------------
* libclamav: Handle spaces in headers such as 'boundary= "foo"'. I believe
that the space on the RHS of the = is not RFC1521,
but Outlook Express generates them
Wed Sep 1 16:11:40 CEST 2004 (tk)
----------------------------------
* libclamav: replace current MD5 implementation with another one

View file

@ -17,6 +17,9 @@
*
* Change History:
* $Log: mbox.c,v $
* Revision 1.114 2004/09/03 15:59:00 nigelhorne
* Handle boundary= "foo"
*
* Revision 1.113 2004/08/26 09:33:20 nigelhorne
* Scan Communigate Pro files
*
@ -327,7 +330,7 @@
* Compilable under SCO; removed duplicate code with message.c
*
*/
static char const rcsid[] = "$Id: mbox.c,v 1.113 2004/08/26 09:33:20 nigelhorne Exp $";
static char const rcsid[] = "$Id: mbox.c,v 1.114 2004/09/03 15:59:00 nigelhorne Exp $";
#if HAVE_CONFIG_H
#include "clamav-config.h"
@ -439,9 +442,6 @@ static size_t strip(char *buf, int len);
static bool continuationMarker(const char *line);
static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
static void saveTextPart(message *m, const char *dir);
#if 0
static bool saveFile(const blob *b, const char *dir);
#endif
static void checkURLs(message *m, const char *dir);
#ifdef WITH_CURL
@ -746,6 +746,7 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
const text *t;
message *ret;
bool anyHeadersFound = FALSE;
bool Xheader = FALSE;
cli_dbgmsg("parseEmailHeaders\n");
@ -765,30 +766,31 @@ parseEmailHeaders(const message *m, const table_t *rfc821)
else
buffer = NULL;
if(inHeader) {
if(buffer == NULL) {
/*
* A blank line signifies the end of the header
* and the start of the text
*/
cli_dbgmsg("End of header information\n");
inHeader = FALSE;
} else if(((buffer[0] == '\t') || (buffer[0] == ' ')) &&
(!Xheader)) {
/*
* Section B.2 of RFC822 says TAB or SPACE means
* a continuation of the previous entry.
*/
if(inHeader && buffer &&
((buffer[0] == '\t') || (buffer[0] == ' '))) {
/*
*
* Add all the arguments on the line
*/
const char *ptr;
char *copy = strdup(buffer);
for(ptr = strtok_r(copy, ";", &strptr); ptr; ptr = strtok_r(NULL, ":", &strptr))
messageAddArgument(ret, ptr);
if(strchr(ptr, '='))
messageAddArguments(ret, ptr);
free(copy);
} else if(inHeader) {
/*
* A blank line signifies the end of the header and
* the start of the text
*/
if(buffer == NULL) {
cli_dbgmsg("End of header information\n");
inHeader = FALSE;
} else {
Xheader = (bool)(buffer[0] == 'X');
if((parseEmailHeader(ret, buffer, rfc821) >= 0) ||
(strncasecmp(buffer, "From ", 5) == 0))
anyHeadersFound = TRUE;
@ -973,7 +975,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
while((t_line = t_line->t_next) != NULL);
if(t_line == NULL) {
cli_warnmsg("Multipart MIME message contains no boundary lines\n");
cli_dbgmsg("Multipart MIME message contains no boundary lines\n");
/*
* Free added by Thomas Lamy
* <Thomas.Lamy@in-online.net>
@ -1389,7 +1391,7 @@ parseEmailBody(message *messageIn, text *textIn, const char *dir, const table_t
addAttachment = TRUE;
}
} else {
cli_warnmsg("Text type %s is not supported\n", dtype);
cli_dbgmsg("Text type %s is not supported\n", dtype);
continue;
}
break;
@ -1870,18 +1872,17 @@ static int
getTextPart(message *const messages[], size_t size)
{
size_t i;
int textpart = -1;
for(i = 0; i < size; i++) {
assert(messages[i] != NULL);
if((messageGetMimeType(messages[i]) == TEXT) &&
(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0))
if(messageGetMimeType(messages[i]) == TEXT) {
if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
return (int)i;
textpart = (int)i;
}
for(i = 0; i < size; i++)
if(messageGetMimeType(messages[i]) == TEXT)
return (int)i;
return -1;
}
return textpart;
}
/*
@ -1981,11 +1982,10 @@ continuationMarker(const char *line)
static int
parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
{
const int type = tableFind(rfc821Table, cmd);
#ifdef CL_THREAD_SAFE
char *strptr;
#endif
char *copy = strdup(arg);
char *copy = strdup(arg ? arg : "");
char *ptr = copy;
if(copy == NULL)
@ -1994,7 +1994,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
strstrip(copy);
switch(type) {
switch(tableFind(rfc821Table, cmd)) {
case CONTENT_TYPE:
/*
* Fix for non RFC1521 compliant mailers
@ -2020,7 +2020,7 @@ parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const c
* which I believe is illegal according to
* RFC1521
*/
cli_warnmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", copy);
else {
/*
* Some clients are broken and
@ -2093,108 +2093,6 @@ saveTextPart(message *m, const char *dir)
}
}
#if 0
/*
* Save some data as a unique file in the given directory.
*
* TODO: don't save archive files if archive scanning is disabled, or
* OLE2 files if that is disabled or pattern match --exclude, but
* we need access to the command line options/clamav.conf here to
* be able to do that
*
* FIXME: duplicated code with fileblobSetFilename()
*/
static bool
saveFile(const blob *b, const char *dir)
{
const unsigned long nbytes = blobGetDataSize(b);
size_t suffixLen = 0;
int fd;
const char *cptr, *suffix;
char filename[NAME_MAX + 1];
assert(dir != NULL);
if(nbytes == 0)
return TRUE;
cptr = blobGetFilename(b);
if(cptr == NULL) {
cptr = "unknown";
suffix = "";
} else {
/*
* Some programs are broken and use an idea of a ".suffix"
* to determine the file type rather than looking up the
* magic number. CPM has a lot to answer for...
* FIXME: the suffix now appears twice in the filename...
*/
suffix = strrchr(cptr, '.');
if(suffix == NULL)
suffix = "";
else {
suffixLen = strlen(suffix);
if(suffixLen > 4) {
/* Found a full stop which isn't a suffix */
suffix = "";
suffixLen = 0;
}
}
}
cli_dbgmsg("Saving attachment in %s/%s\n", dir, cptr);
/*
* Allow for very long filenames. We have to truncate them to fit
*/
snprintf(filename, sizeof(filename) - 1 - suffixLen, "%s/%.*sXXXXXX", dir,
(int)(sizeof(filename) - 9 - suffixLen - strlen(dir)), cptr);
/*
* TODO: add a HAVE_MKSTEMP property
*/
#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
fd = mkstemp(filename);
#else
(void)mktemp(filename);
fd = open(filename, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
#endif
if(fd < 0) {
cli_errmsg("Can't create temporary file %s: %s\n", filename, strerror(errno));
cli_dbgmsg("%lu %d %d\n", suffixLen, sizeof(filename), strlen(filename));
return FALSE;
}
/*
* Add the suffix back to the end of the filename. Tut-tut, filenames
* should be independant of their usage on UNIX type systems.
*/
if(suffixLen > 1) {
char stub[NAME_MAX + 1];
snprintf(stub, sizeof(stub), "%s%s", filename, suffix);
#ifdef C_LINUX
rename(stub, filename);
#else
link(stub, filename);
unlink(stub);
#endif
}
cli_dbgmsg("Saving attachment as %s (%lu bytes long)\n",
filename, nbytes);
if(cli_writen(fd, blobGetData(b), (size_t)nbytes) != nbytes) {
perror(filename);
close(fd);
return FALSE;
}
return (close(fd) >= 0);
}
#endif
#ifdef FOLLOWURLS
static void
checkURLs(message *m, const char *dir)

View file

@ -17,6 +17,9 @@
*
* Change History:
* $Log: message.c,v $
* Revision 1.76 2004/09/03 15:59:00 nigelhorne
* Handle boundary= "foo"
*
* Revision 1.75 2004/08/23 13:15:16 nigelhorne
* messageClearMarkers
*
@ -222,7 +225,7 @@
* uuencodebegin() no longer static
*
*/
static char const rcsid[] = "$Id: message.c,v 1.75 2004/08/23 13:15:16 nigelhorne Exp $";
static char const rcsid[] = "$Id: message.c,v 1.76 2004/09/03 15:59:00 nigelhorne Exp $";
#if HAVE_CONFIG_H
#include "clamav-config.h"
@ -282,6 +285,7 @@ static unsigned char base64(char c);
static unsigned char uudecode(char c);
static const char *messageGetArgument(const message *m, int arg);
static void *messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(const text *, void *));
static int usefulArg(const char *arg);
/*
* These maps are ordered in decreasing likelyhood of their appearance
@ -523,20 +527,8 @@ messageAddArgument(message *m, const char *arg)
/* Empty argument? Probably a broken mail client... */
return;
/*
* These are the only arguments we're interested in.
* Do 'fgrep messageFindArgument *.c' if you don't believe me!
* It's probably not good doing this since each time a new
* messageFindArgument is added I need to remember to look here,
* but it can save a lot of memory...
*/
if((strncasecmp(arg, "name", 4) != 0) &&
(strncasecmp(arg, "filename", 8) != 0) &&
(strncasecmp(arg, "boundary", 8) != 0) &&
(strncasecmp(arg, "type", 4) != 0)) {
cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
if(!usefulArg(arg))
return;
}
cli_dbgmsg("Add argument '%s'\n", arg);
@ -598,6 +590,7 @@ messageAddArguments(message *m, const char *s)
}
key = string;
data = strchr(string, '=');
/*
@ -618,8 +611,7 @@ messageAddArguments(message *m, const char *s)
/*
* Completely broken, give up
*/
cli_warnmsg("Can't parse non RFC1521 header \"%s\"\n",
s);
cli_dbgmsg("Can't parse header \"%s\"\n", s);
return;
}
@ -629,6 +621,12 @@ messageAddArguments(message *m, const char *s)
/*
* Handle white space to the right of the equals sign
* This breaks RFC1521 which has:
* parameter := attribute "=" value
* attribute := token ; case-insensitive
* token := 1*<any (ASCII) CHAR except SPACE, CTLs,
* or tspecials>
* But too many MUAs ignore this
*/
while(isspace(*string) && (*string != '\0'))
string++;
@ -651,14 +649,21 @@ messageAddArguments(message *m, const char *s)
cptr++;
string = strchr(cptr, '"');
if((string == NULL) || (strlen(key) == 0)) {
cli_warnmsg("Can't parse header \"%s\"\n", s);
if(usefulArg(key))
cli_warnmsg("Can't parse header (1) \"%s\"\n", s);
free((char *)key);
return;
}
string++;
if(!usefulArg(key)) {
free((char *)key);
continue;
}
data = strdup(cptr);
ptr = (data) ? strchr(data, '"') : NULL;
@ -674,7 +679,7 @@ messageAddArguments(message *m, const char *s)
* TODO: the file should still be saved and
* virus checked
*/
cli_warnmsg("Can't parse header \"%s\"\n", s);
cli_warnmsg("Can't parse header (2) \"%s\"\n", s);
if(data)
free(data);
free((char *)key);
@ -683,14 +688,6 @@ messageAddArguments(message *m, const char *s)
*ptr = '\0';
#if 0
field = cli_malloc(strlen(key) + strlen(data) + 2);
if(field)
sprintf(field, "%s=%s", key, data);
free((char *)key);
free(data);
#else
field = cli_realloc((char *)key, strlen(key) + strlen(data) + 2);
if(field) {
strcat(field, "=");
@ -698,7 +695,6 @@ messageAddArguments(message *m, const char *s)
} else
free((char *)key);
free(data);
#endif
} else {
size_t len;
@ -1392,7 +1388,6 @@ messageToFileblob(message *m, const char *dir)
/*
* Decode and transfer the contents of the message into a blob
* The caller must free the returned blob
* TODO: a lot of code here is duplicated with messageToFileblob
*/
blob *
messageToBlob(message *m)
@ -1876,7 +1871,6 @@ decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(
}
} else while(*in) {
/* Slower decoding for last line */
int nbytes;
if(m->base64chars) {
@ -1978,3 +1972,23 @@ uudecode(char c)
{
return(c - ' ');
}
/*
* These are the only arguments we're interested in.
* Do 'fgrep messageFindArgument *.c' if you don't believe me!
* It's probably not good doing this since each time a new
* messageFindArgument is added I need to remember to look here,
* but it can save a lot of memory...
*/
static int
usefulArg(const char *arg)
{
if((strncasecmp(arg, "name", 4) != 0) &&
(strncasecmp(arg, "filename", 8) != 0) &&
(strncasecmp(arg, "boundary", 8) != 0) &&
(strncasecmp(arg, "type", 4) != 0)) {
cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
return 0;
}
return 1;
}