2003-07-29 15:48:06 +00:00
/*
2006-02-06 02:36:39 +00:00
* Copyright ( C ) 2002 - 2006 Nigel Horne < njh @ bandsman . co . uk >
2003-07-29 15:48:06 +00:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
2006-04-09 19:59:28 +00:00
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
2003-07-29 15:48:06 +00:00
*/
2006-09-26 19:41:43 +00:00
static char const rcsid [ ] = " $Id: mbox.c,v 1.345 2006/09/26 19:41:43 njh Exp $ " ;
2004-02-06 13:46:08 +00:00
# if HAVE_CONFIG_H
# include "clamav-config.h"
# endif
2003-07-29 15:48:06 +00:00
# ifndef CL_DEBUG
2004-09-21 08:16:29 +00:00
# define NDEBUG /* map CLAMAV debug onto standard */
2003-07-29 15:48:06 +00:00
# endif
# ifdef CL_THREAD_SAFE
2003-12-06 04:05:18 +00:00
# ifndef _REENTRANT
2003-07-29 15:48:06 +00:00
# define _REENTRANT /* for Solaris 2.8 */
# endif
2003-12-06 04:05:18 +00:00
# endif
2003-07-29 15:48:06 +00:00
# include <stdio.h>
# include <stdlib.h>
# include <errno.h>
# include <assert.h>
# include <string.h>
2006-07-25 15:09:45 +00:00
# ifdef HAVE_STRINGS_H
2003-07-29 15:48:06 +00:00
# include <strings.h>
2006-07-25 15:09:45 +00:00
# endif
2003-07-29 15:48:06 +00:00
# include <ctype.h>
# include <time.h>
# include <fcntl.h>
2006-07-25 15:09:45 +00:00
# ifdef HAVE_SYS_PARAM_H
2003-08-02 22:37:52 +00:00
# include <sys/param.h>
2006-07-25 15:09:45 +00:00
# endif
# include "clamav.h"
# ifndef C_WINDOWS
2004-10-05 15:48:47 +00:00
# include <dirent.h>
2006-07-25 15:09:45 +00:00
# endif
2004-10-21 09:41:07 +00:00
# include <limits.h>
2006-09-21 09:37:47 +00:00
# include <signal.h>
2003-07-29 15:48:06 +00:00
2004-11-04 10:15:49 +00:00
# if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
# include <stddef.h>
# endif
2004-06-16 08:07:39 +00:00
# ifdef CL_THREAD_SAFE
# include <pthread.h>
# endif
2006-05-03 09:36:40 +00:00
# include "others.h"
# include "defaults.h"
# include "str.h"
# include "filetypes.h"
2003-07-29 15:48:06 +00:00
# include "mbox.h"
2004-06-22 04:08:02 +00:00
# ifdef CL_DEBUG
2006-09-21 09:37:47 +00:00
# include <features.h>
2004-06-22 04:08:02 +00:00
# if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
# define HAVE_BACKTRACE
# endif
2004-06-30 14:32:28 +00:00
# endif
2004-06-22 04:08:02 +00:00
# ifdef HAVE_BACKTRACE
# include <execinfo.h>
# include <syslog.h>
static void sigsegv ( int sig ) ;
static void print_trace ( int use_syslog ) ;
# endif
2003-10-12 20:17:03 +00:00
# if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
2003-07-29 15:48:06 +00:00
# undef strtok_r
# undef __strtok_r
# define strtok_r(a,b,c) strtok(a,b)
# endif
/* required for AIX and Tru64 */
# ifdef TRUE
# undef TRUE
# endif
# ifdef FALSE
# undef FALSE
# endif
2004-08-12 10:37:53 +00:00
typedef enum { FALSE = 0 , TRUE = 1 } bool ;
2005-12-09 17:19:10 +00:00
# ifndef isblank
# define isblank(c) (((c) == ' ') || ((c) == '\t'))
# endif
2004-08-10 08:17:19 +00:00
# define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
2004-08-17 08:31:58 +00:00
2004-09-20 08:31:56 +00:00
/*
2006-06-28 16:06:07 +00:00
* Code does exist to run FOLLOWURLS on systems without libcurl , however that
2004-09-20 08:31:56 +00:00
* is not recommended so it is not compiled by default
2005-03-16 12:14:37 +00:00
*
2005-03-16 21:27:59 +00:00
* On Solaris , when using the GNU C compiler , the clamAV build system uses the
* Sun supplied ld instead of the GNU ld causing an error . Therefore you cannot
* use WITH_CURL on Solaris with gcc , you must configure with
* " --without-libcurl " . I don ' t know if it works with Sun ' s own compiler
*
2005-03-16 12:14:37 +00:00
* Fails to link on Solaris 10 with this error :
2006-04-07 16:30:53 +00:00
* Undefined first referenced
* symbol in file
* __floatdidf / opt / sfw / lib / libcurl . s
2004-09-20 08:31:56 +00:00
*/
2005-03-16 21:27:59 +00:00
# if C_SOLARIS && __GNUC__
2005-03-16 12:14:37 +00:00
# undef WITH_CURL
# endif
2004-09-20 08:31:56 +00:00
# ifdef WITH_CURL
2005-03-06 21:13:16 +00:00
# define FOLLOWURLS 5 / *
* Maximum number of URLs scanned in a message
2005-07-30 15:41:16 +00:00
* part . Helps to find Dialer . gen - 45. If
2005-03-06 21:13:16 +00:00
* not defined , don ' t check any URLs
*/
2004-09-20 08:31:56 +00:00
# endif
2004-08-10 08:17:19 +00:00
2006-09-13 21:40:03 +00:00
# if defined(FOLLOWURLS) || defined(CL_EXPERIMENTAL)
# include "htmlnorm.h"
# endif
# ifdef CL_EXPERIMENTAL
# include "phishcheck.h"
# endif
2004-08-18 15:56:38 +00:00
# ifdef FOLLOWURLS
2004-08-18 07:46:59 +00:00
# ifdef WITH_CURL /* Set in configure */
/*
* To build with WITH_CURL :
* LDFLAGS = ` curl - config - - libs ` . / configure . . .
*/
2004-08-11 15:29:48 +00:00
# include <curl/curl.h>
2004-10-09 08:04:58 +00:00
/*
2006-01-04 09:52:56 +00:00
* Needs curl > = 7.11 ( I ' ve heard that 7.9 can cause crashes and I have seen
* 7.10 segfault , later versions can be flakey as well )
2004-10-09 08:04:58 +00:00
* untested )
2006-06-28 16:06:07 +00:00
*
* Even 7.15 crashes , valgrind shows this :
* = = 2835 = = Warning : client switching stacks ? SP change : 0xBEB0FD2C - - > 0xD0678F0
* = = 2835 = = to suppress , use : - - max - stackframe = 1314225092 or greater
* = = 2835 = = Invalid write of size 4
* = = 2835 = = at 0x40F67BD : Curl_resolv ( in / usr / lib / libcurl . so .3 .0 .0 )
* = = 2835 = = Address 0xD0678F4 is on thread 1 ' s stack
* = = 2835 = = Can ' t extend stack to 0xD067390 during signal delivery for thread 1 :
* = = 2835 = = no stack segment
* = = 2835 = =
* = = 2835 = = Process terminating with default action of signal 11 ( SIGSEGV )
* = = 2835 = = Access not within mapped region at address 0xD067390
* = = 2835 = = at 0x40F67BD : Curl_resolv ( in / usr / lib / libcurl . so .3 .0 .0 )
*
* This bug has been reported upstream , however they claim that the bug
2006-09-21 14:42:06 +00:00
* does not exist : - ( . I have received reports that 7.15 .5 suffers from the
* same problem in Curl_resolv
*
* TODO : Drop curl and do it ourselves
2004-10-09 08:04:58 +00:00
*/
2006-01-04 09:52:56 +00:00
# if (LIBCURL_VERSION_NUM < 0x070B00)
2004-10-09 08:04:58 +00:00
# undef WITH_CURL /* also undef FOLLOWURLS? */
# endif
2006-04-19 11:33:49 +00:00
# else
# error "FOLLOWURLS without CURL is no longer supported"
2004-10-09 08:04:58 +00:00
# endif /*WITH_CURL*/
2004-08-18 15:56:38 +00:00
# else /*!FOLLOWURLS*/
# undef WITH_CURL
2004-10-09 08:04:58 +00:00
# endif /*FOLLOWURLS*/
2004-08-11 15:29:48 +00:00
2004-10-05 15:48:47 +00:00
/*
2004-11-19 11:32:16 +00:00
* Define this to handle messages covered by section 7.3 .2 of RFC1341 .
2004-10-05 15:48:47 +00:00
* This is experimental code so it is up to YOU to ( 1 ) ensure it ' s secure
2004-10-14 17:47:19 +00:00
* ( 2 ) periodically trim the directory of old files
*
* If you use the load balancing feature of clamav - milter to run clamd on
2004-11-12 09:46:01 +00:00
* more than one machine you must make sure that . . . / partial is on a shared
2004-10-14 17:47:19 +00:00
* network filesystem
2004-10-05 15:48:47 +00:00
*/
2006-07-25 15:09:45 +00:00
# ifndef C_WINDOWS /* TODO: when opendir() is done */
2004-11-12 09:46:01 +00:00
# define PARTIAL_DIR
2006-07-25 15:09:45 +00:00
# endif
2004-10-05 15:48:47 +00:00
2006-06-06 21:22:00 +00:00
/*#define NEW_WORLD*/
2005-02-16 22:20:49 +00:00
2006-06-06 16:57:00 +00:00
/*#define SCAN_UNENCODED_BOUNCES */ /*
2006-04-13 12:09:44 +00:00
* Slows things down a lot and only catches unencoded copies
2006-05-12 21:02:40 +00:00
* of EICAR within bounces , which don ' t matter
2006-04-13 12:09:44 +00:00
*/
2006-06-28 21:07:36 +00:00
typedef struct mbox_ctx {
const char * dir ;
const table_t * rfc821Table ;
const table_t * subtypeTable ;
cli_ctx * ctx ;
} mbox_ctx ;
2006-05-03 09:36:40 +00:00
static int cli_parse_mbox ( const char * dir , int desc , cli_ctx * ctx ) ;
2005-03-28 11:06:21 +00:00
static message * parseEmailFile ( FILE * fin , const table_t * rfc821Table , const char * firstLine , const char * dir ) ;
2006-07-01 16:21:03 +00:00
static message * parseEmailHeaders ( message * m , const table_t * rfc821Table ) ;
2004-01-13 10:14:13 +00:00
static int parseEmailHeader ( message * m , const char * line , const table_t * rfc821Table ) ;
2006-06-28 21:07:36 +00:00
static int parseEmailBody ( message * messageIn , text * textIn , mbox_ctx * mctx ) ;
2003-07-29 15:48:06 +00:00
static int boundaryStart ( const char * line , const char * boundary ) ;
static int endOfMessage ( const char * line , const char * boundary ) ;
static int initialiseTables ( table_t * * rfc821Table , table_t * * subtypeTable ) ;
static int getTextPart ( message * const messages [ ] , size_t size ) ;
static size_t strip ( char * buf , int len ) ;
static int parseMimeHeader ( message * m , const char * cmd , const table_t * rfc821Table , const char * arg ) ;
2006-07-03 09:19:15 +00:00
static void saveTextPart ( message * m , const char * dir , int destroy_text ) ;
2004-09-16 18:03:25 +00:00
static char * rfc2047 ( const char * in ) ;
2005-02-16 22:20:49 +00:00
static char * rfc822comments ( const char * in , char * out ) ;
2004-10-05 15:48:47 +00:00
# ifdef PARTIAL_DIR
static int rfc1341 ( message * m , const char * dir ) ;
# endif
2004-12-18 16:34:31 +00:00
static bool usefulHeader ( int commandNumber , const char * cmd ) ;
2005-07-16 15:53:29 +00:00
static char * getline_from_mbox ( char * buffer , size_t len , FILE * fin ) ;
2006-05-03 15:41:44 +00:00
static bool isBounceStart ( const char * line ) ;
2006-07-04 08:40:46 +00:00
static bool exportBinhexMessage ( const char * dir , message * m ) ;
static int exportBounceMessage ( text * start , const mbox_ctx * ctx ) ;
2006-06-28 21:07:36 +00:00
static message * do_multipart ( message * mainMessage , message * * messages , int i , int * rc , mbox_ctx * mctx , message * messageIn , text * * tptr ) ;
2006-07-12 21:21:25 +00:00
static int count_quotes ( const char * buf ) ;
2006-07-24 12:14:46 +00:00
static bool next_is_folded_header ( const text * t ) ;
2004-08-10 08:17:19 +00:00
2006-09-16 10:38:17 +00:00
static void checkURLs ( message * m , mbox_ctx * mctx , int * rc , int is_html ) ;
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
static void do_checkURLs ( message * m , const char * dir , tag_arguments_t * hrefs ) ;
static blob * getHrefs ( message * m , tag_arguments_t * hrefs ) ;
static void hrefs_done ( blob * b , tag_arguments_t * hrefs ) ;
# endif
2004-08-18 07:46:59 +00:00
# ifdef WITH_CURL
2004-08-18 21:39:36 +00:00
struct arg {
2006-09-21 14:42:06 +00:00
CURL * curl ;
2006-09-26 16:43:17 +00:00
# ifdef CL_EXPERIMENTAL
char * url ;
# else
2004-09-30 21:50:39 +00:00
const char * url ;
2006-09-26 16:43:17 +00:00
# endif
2004-09-30 21:50:39 +00:00
const char * dir ;
2004-08-18 21:39:36 +00:00
char * filename ;
} ;
# ifdef CL_THREAD_SAFE
static void * getURL ( void * a ) ;
# else
static void * getURL ( struct arg * arg ) ;
# endif
2004-08-10 08:17:19 +00:00
# endif
2003-07-29 15:48:06 +00:00
/* Maximum line length according to RFC821 */
2005-04-28 14:46:44 +00:00
# define RFC2821LENGTH 1000
2003-07-29 15:48:06 +00:00
/* Hashcodes for our hash tables */
# define CONTENT_TYPE 1
# define CONTENT_TRANSFER_ENCODING 2
# define CONTENT_DISPOSITION 3
/* Mime sub types */
# define PLAIN 1
# define ENRICHED 2
# define HTML 3
# define RICHTEXT 4
# define MIXED 5
2006-05-04 12:44:59 +00:00
# define ALTERNATIVE 6 /* RFC1521*/
2003-07-29 15:48:06 +00:00
# define DIGEST 7
# define SIGNED 8
# define PARALLEL 9
# define RELATED 10 /* RFC2387 */
# define REPORT 11 /* RFC1892 */
2003-10-12 12:39:49 +00:00
# define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
2004-08-17 08:31:58 +00:00
# define FAX MIXED / *
* RFC3458
* Drafts stated to treat is as mixed if it is
* not known . This disappeared in the final
* version ( except when talking about
* voice - message ) , but it is good enough for us
* since we do no validation of coversheet
* presence etc . ( which also has disappeared
* in the final version )
*/
2004-10-04 12:21:11 +00:00
# define ENCRYPTED 13 / *
* e . g . RFC2015
* Content - Type : multipart / encrypted ;
* boundary = " nextPart1383049.XCRrrar2yq " ;
* protocol = " application/pgp-encrypted "
*/
2004-10-16 19:12:33 +00:00
# define X_BFILE RELATED / *
* BeOS , expert two parts : the file and it ' s
* attributes . The attributes part comes as
* Content - Type : application / x - be_attribute
* name = " foo "
* I can ' t find where it is defined , any
* pointers would be appreciated . For now
* we treat it as multipart / related
*/
2004-10-24 04:38:09 +00:00
# define KNOWBOT 14 /* Unknown and undocumented format? */
2003-07-29 15:48:06 +00:00
static const struct tableinit {
const char * key ;
int value ;
} rfc821headers [ ] = {
2004-01-09 14:46:59 +00:00
/* TODO: make these regular expressions */
2004-03-18 21:52:51 +00:00
{ " Content-Type " , CONTENT_TYPE } ,
2004-02-23 10:14:50 +00:00
{ " Content-Transfer-Encoding " , CONTENT_TRANSFER_ENCODING } ,
{ " Content-Disposition " , CONTENT_DISPOSITION } ,
2003-07-29 15:48:06 +00:00
{ NULL , 0 }
2004-10-16 09:03:43 +00:00
} , mimeSubtypes [ ] = { /* see RFC2045 */
2003-07-29 15:48:06 +00:00
/* subtypes of Text */
{ " plain " , PLAIN } ,
{ " enriched " , ENRICHED } ,
{ " html " , HTML } ,
{ " richtext " , RICHTEXT } ,
/* subtypes of Multipart */
{ " mixed " , MIXED } ,
{ " alternative " , ALTERNATIVE } ,
{ " digest " , DIGEST } ,
{ " signed " , SIGNED } ,
{ " parallel " , PARALLEL } ,
{ " related " , RELATED } ,
{ " report " , REPORT } ,
2003-10-12 12:39:49 +00:00
{ " appledouble " , APPLEDOUBLE } ,
2004-08-17 08:31:58 +00:00
{ " fax-message " , FAX } ,
2004-10-04 12:21:11 +00:00
{ " encrypted " , ENCRYPTED } ,
2004-10-16 19:12:33 +00:00
{ " x-bfile " , X_BFILE } , /* BeOS */
2004-10-24 04:38:09 +00:00
{ " knowbot " , KNOWBOT } , /* ??? */
{ " knowbot-metadata " , KNOWBOT } , /* ??? */
{ " knowbot-code " , KNOWBOT } , /* ??? */
{ " knowbot-state " , KNOWBOT } , /* ??? */
2003-07-29 15:48:06 +00:00
{ NULL , 0 }
} ;
2004-06-16 08:07:39 +00:00
# ifdef CL_THREAD_SAFE
static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER ;
# endif
2003-07-29 15:48:06 +00:00
2004-02-14 17:23:45 +00:00
# ifndef O_BINARY
# define O_BINARY 0
# endif
2005-02-16 22:20:49 +00:00
# ifdef NEW_WORLD
2004-12-16 15:29:08 +00:00
2006-06-07 12:30:09 +00:00
# include "matcher.h"
2006-01-02 17:40:29 +00:00
# undef PARTIAL_DIR
2004-12-16 15:29:08 +00:00
# if HAVE_MMAP
# if HAVE_SYS_MMAN_H
# include <sys/mman.h>
# else /* HAVE_SYS_MMAN_H */
# undef HAVE_MMAP
# endif
2006-01-11 09:49:03 +00:00
# else /*HAVE_MMAP*/
# undef NEW_WORLD
# endif
2004-12-16 15:29:08 +00:00
# endif
2006-01-11 09:49:03 +00:00
# ifdef NEW_WORLD
2006-01-05 11:16:27 +00:00
/*
* Files larger than this are scanned with the old method , should be
* StreamMaxLength , I guess
* If NW_MAX_FILE_SIZE is not defined , all files go through the
* new method . This definition is for machines very tight on RAM , or
* with large StreamMaxLength values
*/
# define MAX_ALLOCATION 134217728 /* see libclamav/others.c */
# define NW_MAX_FILE_SIZE MAX_ALLOCATION
2005-02-16 22:20:49 +00:00
struct scanlist {
2006-01-05 11:16:27 +00:00
const char * start ;
size_t size ;
encoding_type decoder ; /* only BASE64 and QUOTEDPRINTABLE for now */
struct scanlist * next ;
2005-02-16 22:20:49 +00:00
} ;
2006-01-05 11:16:27 +00:00
static struct map {
const char * offset ; /* sorted */
const char * word ;
struct map * next ;
} * map , * tail ;
2006-06-07 12:30:09 +00:00
static int save_text ( cli_ctx * ctx , const char * dir , const char * start , size_t len ) ;
2006-01-05 11:16:27 +00:00
static void create_map ( const char * begin , const char * end ) ;
static void add_to_map ( const char * offset , const char * word ) ;
static const char * find_in_map ( const char * offset , const char * word ) ;
static void free_map ( void ) ;
2006-09-13 21:40:03 +00:00
2004-12-16 15:29:08 +00:00
/*
* This could be the future . Instead of parsing and decoding it just decodes .
2005-02-16 22:20:49 +00:00
*
2004-12-16 15:29:08 +00:00
* USE IT AT YOUR PERIL , a large number of viruses are not detected with this
* method , possibly because the decoded files must be exact and not have
* extra data at the start or end , which this code will produce .
2004-12-19 13:50:08 +00:00
*
2005-02-16 22:20:49 +00:00
* Currently only supports base64 and quoted - printable
*
* You may also see a lot of warnings . For the moment it falls back to old
* world mode if it doesn ' t know what to do - that ' ll be removed .
* The code is untidy . . .
*
* FIXME : Some mailbox scans are slower with this method . I suspect that it ' s
* because the scan can proceed to the end of the file rather than the end
* of the attachment which can mean than later emails are scanned many times
2005-04-13 09:10:29 +00:00
*
2005-07-03 08:31:33 +00:00
* FIXME : quoted printable doesn ' t know when to stop , so size related virus
* matching breaks
*
2006-01-05 11:16:27 +00:00
* TODO : Fall through to cli_parse_mbox ( ) too often
2006-01-02 17:40:29 +00:00
*
2006-01-11 09:49:03 +00:00
* TODO : Add support for systems without mmap ( )
2006-01-02 17:40:29 +00:00
*
* TODO : partial_dir fall through
2006-06-28 16:06:07 +00:00
*
* FIXME : Some EICAR gets through
2004-12-16 15:29:08 +00:00
*/
int
2006-05-03 09:36:40 +00:00
cli_mbox ( const char * dir , int desc , cli_ctx * ctx )
2004-12-16 15:29:08 +00:00
{
2006-01-11 09:49:03 +00:00
char * start , * ptr , * line ;
const char * last , * p , * q ;
2006-01-05 12:42:39 +00:00
size_t size ;
2004-12-16 15:29:08 +00:00
struct stat statb ;
message * m ;
fileblob * fb ;
2006-06-07 12:30:09 +00:00
int ret = CL_CLEAN ;
2005-02-06 09:47:07 +00:00
int wasAlloced ;
2005-02-16 22:20:49 +00:00
struct scanlist * scanlist , * scanelem ;
2004-12-16 15:29:08 +00:00
2005-03-02 20:08:24 +00:00
if ( dir = = NULL ) {
cli_warnmsg ( " cli_mbox called with NULL dir \n " ) ;
return CL_ENULLARG ;
}
2004-12-16 15:29:08 +00:00
if ( fstat ( desc , & statb ) < 0 )
return CL_EOPEN ;
size = statb . st_size ;
if ( size = = 0 )
return CL_CLEAN ;
2006-01-05 11:16:27 +00:00
# ifdef NW_MAX_FILE_SIZE
if ( size > NW_MAX_FILE_SIZE )
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2006-01-05 11:16:27 +00:00
# endif
2004-12-16 15:29:08 +00:00
2006-01-02 17:40:29 +00:00
/*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/
2005-07-03 08:31:33 +00:00
# ifdef PARTIAL_DIR
cli_warnmsg ( " PARTIAL_DIR doesn't work in the NEW_WORLD yet \n " ) ;
# endif
2005-02-17 19:06:32 +00:00
2005-02-16 22:20:49 +00:00
start = mmap ( NULL , size , PROT_READ , MAP_PRIVATE , desc , 0 ) ;
if ( start = = MAP_FAILED )
2004-12-16 15:29:08 +00:00
return CL_EMEM ;
cli_dbgmsg ( " mmap'ed mbox \n " ) ;
2005-02-16 22:20:49 +00:00
ptr = cli_malloc ( size ) ;
if ( ptr ) {
memcpy ( ptr , start , size ) ;
2005-02-06 09:47:07 +00:00
munmap ( start , size ) ;
2005-02-16 22:20:49 +00:00
start = ptr ;
2006-01-05 11:16:27 +00:00
wasAlloced = 1 ;
2005-02-06 09:47:07 +00:00
} else
wasAlloced = 0 ;
2006-01-05 11:16:27 +00:00
/* last points to the last *valid* address in the array */
last = & start [ size - 1 ] ;
create_map ( start , last ) ;
2005-02-16 22:20:49 +00:00
scanelem = scanlist = NULL ;
q = start ;
2006-01-05 11:16:27 +00:00
/*
* FIXME : mismatch of const char * and char * here and in later calls
* to find_in_map ( )
*/
while ( ( p = find_in_map ( q , " base64 " ) ) ! = NULL ) {
2005-02-16 22:20:49 +00:00
cli_dbgmsg ( " Found base64 \n " ) ;
if ( scanelem ) {
scanelem - > next = cli_malloc ( sizeof ( struct scanlist ) ) ;
scanelem = scanelem - > next ;
} else
scanlist = scanelem = cli_malloc ( sizeof ( struct scanlist ) ) ;
scanelem - > next = NULL ;
scanelem - > decoder = BASE64 ;
q = scanelem - > start = & p [ 6 ] ;
2006-01-05 11:16:27 +00:00
if ( ( ( p = find_in_map ( q , " \n From " ) ) ! = NULL ) | |
( ( p = find_in_map ( q , " base64 " ) ) ! = NULL ) | |
( ( p = find_in_map ( q , " quoted-printable " ) ) ! = NULL ) ) {
2005-03-18 18:12:25 +00:00
scanelem - > size = ( size_t ) ( p - q ) ;
2005-02-16 22:20:49 +00:00
q = p ;
2005-04-13 09:10:29 +00:00
} else {
scanelem - > size = ( size_t ) ( last - scanelem - > start ) + 1 ;
break ;
}
2006-01-05 12:42:39 +00:00
cli_dbgmsg ( " base64: last %u q %u \n " , ( unsigned int ) last , ( unsigned int ) q ) ;
2005-02-16 22:20:49 +00:00
assert ( scanelem - > size < = size ) ;
2004-12-16 15:29:08 +00:00
}
2006-01-02 17:40:29 +00:00
2005-02-16 22:20:49 +00:00
q = start ;
2006-01-05 11:16:27 +00:00
while ( ( p = find_in_map ( q , " quoted-printable " ) ) ! = NULL ) {
2005-04-13 09:10:29 +00:00
if ( p ! = q )
switch ( p [ - 1 ] ) {
case ' ' :
case ' : ' :
case ' = ' : /* wrong but allow it */
break ;
default :
q = & p [ 16 ] ;
cli_dbgmsg ( " Ignore quoted-printable false positive \n " ) ;
continue ; /* false positive */
}
2005-04-19 09:23:12 +00:00
2005-02-16 22:20:49 +00:00
cli_dbgmsg ( " Found quoted-printable \n " ) ;
2006-01-02 17:40:29 +00:00
# ifdef notdef
/*
* The problem with quoted printable is recognising when to stop
* parsing
*/
2005-02-16 22:20:49 +00:00
if ( scanelem ) {
scanelem - > next = cli_malloc ( sizeof ( struct scanlist ) ) ;
scanelem = scanelem - > next ;
} else
scanlist = scanelem = cli_malloc ( sizeof ( struct scanlist ) ) ;
scanelem - > next = NULL ;
scanelem - > decoder = QUOTEDPRINTABLE ;
q = scanelem - > start = & p [ 16 ] ;
2006-01-05 12:42:39 +00:00
cli_dbgmsg ( " qp: last %u q %u \n " , ( unsigned int ) last , ( unsigned int ) q ) ;
2006-01-05 11:16:27 +00:00
if ( ( ( p = find_in_map ( q , " \n From " ) ) ! = NULL ) | |
( ( p = find_in_map ( q , " quoted-printable " ) ) ! = NULL ) | |
( ( p = find_in_map ( q , " base64 " ) ) ! = NULL ) ) {
2005-03-18 18:12:25 +00:00
scanelem - > size = ( size_t ) ( p - q ) ;
2005-02-16 22:20:49 +00:00
q = p ;
2005-04-13 09:10:29 +00:00
cli_dbgmsg ( " qp: scanelem->size = %u \n " , scanelem - > size ) ;
} else {
scanelem - > size = ( size_t ) ( last - scanelem - > start ) + 1 ;
break ;
}
2005-02-16 22:20:49 +00:00
assert ( scanelem - > size < = size ) ;
2006-01-02 17:40:29 +00:00
# else
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2006-01-05 11:16:27 +00:00
free_map ( ) ;
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2006-01-02 17:40:29 +00:00
# endif
2004-12-16 15:29:08 +00:00
}
2005-02-16 22:20:49 +00:00
if ( scanlist = = NULL ) {
const struct tableinit * tableinit ;
bool anyHeadersFound = FALSE ;
2005-04-13 09:10:29 +00:00
bool hasuuencode = FALSE ;
2006-01-02 17:40:29 +00:00
cli_file_t type ;
2005-02-16 22:20:49 +00:00
/* FIXME: message: There could of course be no decoder needed... */
for ( tableinit = rfc821headers ; tableinit - > key ; tableinit + + )
2006-01-05 11:16:27 +00:00
if ( find_in_map ( start , tableinit - > key ) ) {
2005-02-16 22:20:49 +00:00
anyHeadersFound = TRUE ;
break ;
}
2006-06-07 12:30:09 +00:00
if ( ( ! anyHeadersFound ) & &
( ( p = find_in_map ( start , " \n begin " ) ) ! = NULL ) & &
( isuuencodebegin ( + + p ) ) )
2005-04-13 09:10:29 +00:00
/* uuencoded part */
hasuuencode = TRUE ;
2006-06-07 12:30:09 +00:00
else {
cli_dbgmsg ( " Nothing encoded, looking for a text part to save \n " ) ;
ret = save_text ( ctx , dir , start , size ) ;
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
free_map ( ) ;
if ( ret ! = CL_EFORMAT )
return ret ;
ret = CL_CLEAN ;
}
2005-04-13 09:10:29 +00:00
2006-01-05 11:16:27 +00:00
free_map ( ) ;
2006-01-02 17:40:29 +00:00
type = cli_filetype ( start , size ) ;
if ( ( type = = CL_TYPE_UNKNOWN_TEXT ) & &
( strncmp ( start , " Microsoft Mail Internet Headers " , 31 ) = = 0 ) )
2006-01-05 11:16:27 +00:00
type = CL_TYPE_MAIL ;
2006-01-02 17:40:29 +00:00
2005-02-06 09:47:07 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2004-12-16 15:29:08 +00:00
2005-04-13 09:10:29 +00:00
if ( anyHeadersFound | | hasuuencode ) {
/* TODO: reduce the number of falls through here */
2006-01-02 17:40:29 +00:00
if ( hasuuencode )
2006-06-07 12:30:09 +00:00
/* TODO: fast track visa */
cli_warnmsg ( " New world - fall back to old uudecoder \n " ) ;
2006-01-02 17:40:29 +00:00
else
2006-06-07 12:30:09 +00:00
cli_warnmsg ( " cli_mbox: unknown encoder, type %d \n " , type ) ;
2006-01-02 17:40:29 +00:00
if ( type = = CL_TYPE_MAIL )
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2006-01-02 17:40:29 +00:00
cli_dbgmsg ( " Unknown filetype %d, return CLEAN \n " , type ) ;
return CL_CLEAN ;
2004-12-19 13:50:08 +00:00
}
2005-04-13 09:10:29 +00:00
2006-06-07 12:30:09 +00:00
#if 0 /* I don't believe this is needed any more */
2006-06-06 20:55:20 +00:00
/*
* The message could be a plain text phish
* FIXME : Can ' t get to the option whether we are looking for
* phishes or not , so assume we are , this slows things a
* lot
* Should be
* if ( ( type = = CL_TYPE_MAIL ) & & ( ! ( no - phishing ) )
*/
if ( type = = CL_TYPE_MAIL )
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2006-06-07 12:30:09 +00:00
# endif
2006-06-06 20:55:20 +00:00
cli_dbgmsg ( " cli_mbox: I believe it's plain text (type == %d) which must be clean \n " ,
type ) ;
2005-02-16 22:20:49 +00:00
return CL_CLEAN ;
}
2006-01-05 11:16:27 +00:00
#if 0
if ( wasAlloced ) {
const char * max = NULL ;
for ( scanelem = scanlist ; scanelem ; scanelem = scanelem - > next ) {
const char * end = & scanelem - > start [ scanelem - > size ] ;
if ( end > max )
max = end ;
}
if ( max < last )
printf ( " could free %d bytes \n " , ( int ) ( last - max ) ) ;
}
# endif
2005-02-16 22:20:49 +00:00
for ( scanelem = scanlist ; scanelem ; scanelem = scanelem - > next ) {
if ( scanelem - > decoder = = BASE64 ) {
2006-01-05 11:16:27 +00:00
const char * b64start = scanelem - > start ;
size_t b64size = scanelem - > size ;
2005-02-16 22:20:49 +00:00
cli_dbgmsg ( " b64size = %lu \n " , b64size ) ;
2005-07-02 21:05:36 +00:00
while ( ( * b64start ! = ' \n ' ) & & ( * b64start ! = ' \r ' ) ) {
2004-12-19 13:50:08 +00:00
b64start + + ;
b64size - - ;
2005-02-16 22:20:49 +00:00
}
/*
* Look for the end of the headers
*/
while ( b64start < last ) {
if ( * b64start = = ' ; ' ) {
2004-12-19 13:50:08 +00:00
b64start + + ;
b64size - - ;
2005-07-02 21:05:36 +00:00
} else if ( ( memcmp ( b64start , " \n \n " , 2 ) = = 0 ) | |
( memcmp ( b64start , " \r \r " , 2 ) = = 0 ) ) {
b64start + = 2 ;
b64size - = 2 ;
break ;
} else if ( memcmp ( b64start , " \r \n \r \n " , 4 ) = = 0 ) {
b64start + = 4 ;
b64size - = 4 ;
break ;
2006-01-02 17:40:29 +00:00
} else if ( memcmp ( b64start , " \n \n " , 3 ) = = 0 ) {
/*
* Some viruses are broken and have
* one space character at the end of
* the headers
*/
b64start + = 3 ;
b64size - = 3 ;
break ;
} else if ( memcmp ( b64start , " \r \n \r \n " , 5 ) = = 0 ) {
/*
* Some viruses are broken and have
* one space character at the end of
* the headers
*/
b64start + = 5 ;
b64size - = 5 ;
break ;
2004-12-19 13:50:08 +00:00
}
2005-02-06 18:25:10 +00:00
b64start + + ;
2005-02-16 22:20:49 +00:00
b64size - - ;
2005-02-06 18:25:10 +00:00
}
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
if ( b64size > 0L )
2005-04-13 09:10:29 +00:00
while ( ( ! isalnum ( * b64start ) ) & & ( * b64start ! = ' / ' ) ) {
2005-02-16 22:20:49 +00:00
if ( b64size - - = = 0L )
break ;
b64start + + ;
}
if ( b64size > 0L ) {
2006-04-20 10:25:47 +00:00
int lastline ;
2006-04-07 16:30:53 +00:00
char * tmpfilename ;
unsigned char * uptr ;
2005-02-16 22:20:49 +00:00
cli_dbgmsg ( " cli_mbox: decoding %ld base64 bytes \n " , b64size ) ;
2006-04-20 10:25:47 +00:00
if ( ( fb = fileblobCreate ( ) ) = = NULL ) {
2006-06-07 12:30:09 +00:00
free_map ( ) ;
2006-04-07 16:30:53 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
return CL_EMEM ;
}
2006-04-20 10:25:47 +00:00
tmpfilename = cli_gentemp ( dir ) ;
2006-06-07 12:30:09 +00:00
if ( tmpfilename = = NULL ) {
free_map ( ) ;
2006-04-07 16:30:53 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2006-04-20 10:25:47 +00:00
fileblobDestroy ( fb ) ;
2006-04-07 16:30:53 +00:00
2006-04-20 10:25:47 +00:00
return CL_EMEM ;
2006-04-07 16:30:53 +00:00
}
2006-04-20 10:25:47 +00:00
fileblobSetFilename ( fb , dir , tmpfilename ) ;
free ( tmpfilename ) ;
2006-04-07 16:30:53 +00:00
2005-02-16 22:20:49 +00:00
line = NULL ;
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
m = messageCreate ( ) ;
2006-01-02 17:40:29 +00:00
if ( m = = NULL ) {
2006-06-07 12:30:09 +00:00
free_map ( ) ;
2006-01-02 17:40:29 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2006-04-20 10:25:47 +00:00
fileblobDestroy ( fb ) ;
2006-01-02 17:40:29 +00:00
2005-02-16 22:20:49 +00:00
return CL_EMEM ;
2006-01-02 17:40:29 +00:00
}
2005-02-16 22:20:49 +00:00
messageSetEncoding ( m , " base64 " ) ;
2004-12-19 13:50:08 +00:00
2006-05-03 15:41:44 +00:00
messageSetCTX ( m , ctx ) ;
fileblobSetCTX ( fb , ctx ) ;
2005-07-02 21:05:36 +00:00
lastline = 0 ;
2005-04-13 09:10:29 +00:00
do {
2006-04-07 16:30:53 +00:00
int length = 0 , datalen ;
2005-07-02 21:05:36 +00:00
char * newline , * equal ;
2006-04-07 16:30:53 +00:00
unsigned char * bigbuf , * data ;
unsigned char smallbuf [ 1024 ] ;
2006-06-06 20:55:20 +00:00
const char * cptr ;
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
/*printf("%ld: ", b64size); fflush(stdout);*/
2004-12-19 13:50:08 +00:00
2006-06-06 20:55:20 +00:00
for ( cptr = b64start ; b64size & & ( * cptr ! = ' \n ' ) & & ( * cptr ! = ' \r ' ) ; cptr + + ) {
2005-02-16 22:20:49 +00:00
length + + ;
- - b64size ;
}
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
/*printf("%d: ", length); fflush(stdout);*/
2004-12-19 13:50:08 +00:00
2005-05-13 19:43:37 +00:00
newline = cli_realloc ( line , length + 1 ) ;
if ( newline = = NULL )
break ;
line = newline ;
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
memcpy ( line , b64start , length ) ;
line [ length ] = ' \0 ' ;
2004-12-19 13:50:08 +00:00
2005-07-02 21:05:36 +00:00
equal = strchr ( line , ' = ' ) ;
if ( equal ) {
lastline + + ;
* equal = ' \0 ' ;
}
2005-02-16 22:20:49 +00:00
/*puts(line);*/
2004-12-19 13:50:08 +00:00
2006-04-07 16:30:53 +00:00
#if 0
2005-02-16 22:20:49 +00:00
if ( messageAddStr ( m , line ) < 0 )
break ;
2006-04-07 16:30:53 +00:00
# endif
2006-04-20 10:25:47 +00:00
if ( length > = ( int ) sizeof ( smallbuf ) ) {
2006-04-07 16:30:53 +00:00
datalen = length + 2 ;
data = bigbuf = cli_malloc ( datalen ) ;
if ( data = = NULL )
break ;
} else {
bigbuf = NULL ;
data = smallbuf ;
datalen = sizeof ( data ) - 1 ;
}
uptr = decodeLine ( m , BASE64 , line , data , datalen ) ;
if ( uptr = = NULL ) {
if ( bigbuf )
free ( bigbuf ) ;
break ;
}
/*cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));*/
2006-04-20 10:25:47 +00:00
datalen = fileblobAddData ( fb , data , ( size_t ) ( uptr - data ) ) ;
2006-04-07 16:30:53 +00:00
if ( bigbuf )
free ( bigbuf ) ;
2004-12-19 13:50:08 +00:00
2006-04-20 10:25:47 +00:00
if ( datalen < 0 )
break ;
2006-05-19 11:03:22 +00:00
if ( fileblobContainsVirus ( fb ) )
2006-05-04 10:37:03 +00:00
break ;
2006-04-20 10:25:47 +00:00
2006-06-06 20:55:20 +00:00
if ( ( b64size > 0 ) & & ( * cptr = = ' \r ' ) ) {
b64start = + + cptr ;
2005-07-02 21:05:36 +00:00
- - b64size ;
}
2006-06-06 20:55:20 +00:00
if ( ( b64size > 0 ) & & ( * cptr = = ' \n ' ) ) {
b64start = + + cptr ;
2005-02-16 22:20:49 +00:00
- - b64size ;
}
2005-07-02 21:05:36 +00:00
if ( lastline )
2005-02-16 22:20:49 +00:00
break ;
2005-04-13 09:10:29 +00:00
} while ( b64size > 0L ) ;
2006-04-07 16:30:53 +00:00
if ( m - > base64chars ) {
unsigned char data [ 4 ] ;
uptr = base64Flush ( m , data ) ;
if ( uptr ) {
/*cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));*/
2006-04-20 10:25:47 +00:00
( void ) fileblobAddData ( fb , data , ( size_t ) ( uptr - data ) ) ;
2006-04-07 16:30:53 +00:00
}
}
2006-04-20 10:25:47 +00:00
if ( fb )
fileblobDestroy ( fb ) ;
else
2006-05-19 11:03:22 +00:00
ret = - 1 ;
2006-04-07 16:30:53 +00:00
messageDestroy ( m ) ;
free ( line ) ;
2004-12-16 15:29:08 +00:00
}
2005-02-16 22:20:49 +00:00
} else if ( scanelem - > decoder = = QUOTEDPRINTABLE ) {
2006-01-05 11:16:27 +00:00
const char * quotedstart = scanelem - > start ;
size_t quotedsize = scanelem - > size ;
2004-12-19 13:50:08 +00:00
2005-02-16 22:20:49 +00:00
cli_dbgmsg ( " quotedsize = %lu \n " , quotedsize ) ;
while ( * quotedstart ! = ' \n ' ) {
2004-12-19 13:50:08 +00:00
quotedstart + + ;
quotedsize - - ;
2005-02-16 22:20:49 +00:00
}
/*
* Look for the end of the headers
*/
while ( quotedstart < last ) {
if ( * quotedstart = = ' ; ' ) {
2004-12-19 13:50:08 +00:00
quotedstart + + ;
quotedsize - - ;
2005-07-02 21:05:36 +00:00
} else if ( ( * quotedstart = = ' \n ' ) | | ( * quotedstart = = ' \r ' ) ) {
2005-02-16 22:20:49 +00:00
quotedstart + + ;
quotedsize - - ;
if ( ( * quotedstart = = ' \n ' ) | | ( * quotedstart = = ' \r ' ) ) {
quotedstart + + ;
quotedsize - - ;
break ;
}
2004-12-19 13:50:08 +00:00
}
2005-02-16 22:20:49 +00:00
quotedstart + + ;
quotedsize - - ;
2004-12-19 13:50:08 +00:00
}
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
while ( ! isalnum ( * quotedstart ) ) {
quotedstart + + ;
quotedsize - - ;
}
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
if ( quotedsize > 0L ) {
cli_dbgmsg ( " cli_mbox: decoding %ld quoted-printable bytes \n " , quotedsize ) ;
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
m = messageCreate ( ) ;
2006-01-02 17:40:29 +00:00
if ( m = = NULL ) {
2006-06-07 12:30:09 +00:00
free_map ( ) ;
2006-01-02 17:40:29 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2005-02-16 22:20:49 +00:00
return CL_EMEM ;
2006-01-02 17:40:29 +00:00
}
2005-02-16 22:20:49 +00:00
messageSetEncoding ( m , " quoted-printable " ) ;
2006-05-03 15:41:44 +00:00
messageSetCTX ( m , ctx ) ;
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
line = NULL ;
2004-12-16 15:29:08 +00:00
2005-04-13 09:10:29 +00:00
do {
2005-02-16 22:20:49 +00:00
int length = 0 ;
2005-05-13 19:43:37 +00:00
char * newline ;
2006-06-06 20:55:20 +00:00
const char * cptr ;
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
/*printf("%ld: ", quotedsize); fflush(stdout);*/
2004-12-16 15:29:08 +00:00
2006-06-06 20:55:20 +00:00
for ( cptr = quotedstart ; quotedsize & & ( * cptr ! = ' \n ' ) & & ( * cptr ! = ' \r ' ) ; cptr + + ) {
2005-02-16 22:20:49 +00:00
length + + ;
- - quotedsize ;
}
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
/*printf("%d: ", length); fflush(stdout);*/
2004-12-16 15:29:08 +00:00
2005-05-13 19:43:37 +00:00
newline = cli_realloc ( line , length + 1 ) ;
if ( newline = = NULL )
break ;
line = newline ;
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
memcpy ( line , quotedstart , length ) ;
line [ length ] = ' \0 ' ;
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
/*puts(line);*/
2004-12-16 15:29:08 +00:00
2005-02-16 22:20:49 +00:00
if ( messageAddStr ( m , line ) < 0 )
break ;
2004-12-19 13:50:08 +00:00
2006-06-06 20:55:20 +00:00
if ( ( quotedsize > 0 ) & & ( * cptr = = ' \r ' ) ) {
quotedstart = + + cptr ;
2005-07-02 21:05:36 +00:00
- - quotedsize ;
}
2006-06-06 20:55:20 +00:00
if ( ( quotedsize > 0 ) & & ( * cptr = = ' \n ' ) ) {
quotedstart = + + cptr ;
2005-02-16 22:20:49 +00:00
- - quotedsize ;
}
2005-04-13 09:10:29 +00:00
} while ( quotedsize > 0L ) ;
2005-02-16 22:20:49 +00:00
free ( line ) ;
2006-07-01 16:21:03 +00:00
fb = messageToFileblob ( m , dir , 1 ) ;
2005-02-16 22:20:49 +00:00
messageDestroy ( m ) ;
2004-12-19 13:50:08 +00:00
2006-05-19 11:03:22 +00:00
if ( fb )
2005-02-16 22:20:49 +00:00
fileblobDestroy ( fb ) ;
2006-05-19 11:03:22 +00:00
else
ret = - 1 ;
2005-02-16 22:20:49 +00:00
}
2004-12-16 15:29:08 +00:00
}
}
2005-02-16 22:20:49 +00:00
scanelem = scanlist ;
2006-06-07 12:30:09 +00:00
/*
* There could be a phish in the plain text part , so save that
* FIXME : Can ' t get to the option whether we are looking for
* phishes or not , so assume we are , this slows things a
* lot
* Should be
* if ( ( type = = CL_TYPE_MAIL ) & & ( ! ( no - phishing ) )
*/
ret = save_text ( ctx , dir , start , size ) ;
free_map ( ) ;
2005-02-16 22:20:49 +00:00
while ( scanelem ) {
struct scanlist * n = scanelem - > next ;
free ( scanelem ) ;
scanelem = n ;
}
2004-12-16 15:29:08 +00:00
2005-02-06 09:47:07 +00:00
if ( wasAlloced )
free ( start ) ;
else
munmap ( start , size ) ;
2004-12-16 15:29:08 +00:00
2005-02-06 09:47:07 +00:00
/*
* FIXME : Need to run cl_scandir ( ) here and return that value
*/
2006-04-07 16:30:53 +00:00
cli_dbgmsg ( " cli_mbox: ret = %d \n " , ret ) ;
2006-06-07 12:30:09 +00:00
if ( ret ! = CL_EFORMAT )
return ret ;
2004-12-19 13:50:08 +00:00
2006-06-07 12:30:09 +00:00
cli_warnmsg ( " New world - don't know what to do - fall back to old world \n " ) ;
2005-02-16 22:20:49 +00:00
/* Fall back for now */
2005-04-13 09:10:29 +00:00
lseek ( desc , 0L , SEEK_SET ) ;
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2004-12-16 15:29:08 +00:00
}
2006-01-05 11:16:27 +00:00
2006-06-07 12:30:09 +00:00
/*
* Save a text part - it could contain phish or jscript
*/
static int
save_text ( cli_ctx * ctx , const char * dir , const char * start , size_t len )
{
const char * p ;
if ( ( p = find_in_map ( start , " \n \n " ) ) | | ( p = find_in_map ( start , " \r \n \r \n " ) ) ) {
const char * q ;
fileblob * fb ;
char * tmpfilename ;
if ( ( ( q = find_in_map ( start , " base64 " ) ) = = NULL ) & &
( ( q = find_in_map ( start , " quoted_printable " ) ) = = NULL ) ) {
cli_dbgmsg ( " It's all plain text! \n " ) ;
if ( * p = = ' \r ' )
p + = 4 ;
else
p + = 2 ;
len - = ( p - start ) ;
} else if ( ( ( q = find_in_map ( p , " \n From " ) ) = = NULL ) & &
( ( q = find_in_map ( p , " base64 " ) ) = = NULL ) & &
( ( q = find_in_map ( p , " quoted-printable " ) ) = = NULL ) )
cli_dbgmsg ( " Can't find end of plain text - assume it's all \n " ) ;
else
len = ( size_t ) ( q - p ) ;
if ( len < 5 ) {
cli_dbgmsg ( " save_text: Too small \n " ) ;
return CL_EFORMAT ;
}
if ( ctx - > scanned )
* ctx - > scanned + = len / CL_COUNT_PRECISION ;
/*
* This doesn ' t work , cli_scanbuff isn ' t designed to be used
* in this way . It gets the " filetype " wrong and then
* doesn ' t scan correctly
*/
if ( cli_scanbuff ( ( char * ) p , len , ctx - > virname , ctx - > engine , 0 ) = = CL_VIRUS ) {
cli_dbgmsg ( " save_text: found %s \n " , * ctx - > virname ) ;
return CL_VIRUS ;
}
fb = fileblobCreate ( ) ;
if ( fb = = NULL )
return CL_EMEM ;
tmpfilename = cli_gentemp ( dir ) ;
if ( tmpfilename = = NULL ) {
fileblobDestroy ( fb ) ;
return CL_ETMPFILE ;
}
cli_dbgmsg ( " save plain bit to %s, %u bytes \n " ,
tmpfilename , len ) ;
fileblobSetFilename ( fb , dir , tmpfilename ) ;
free ( tmpfilename ) ;
( void ) fileblobAddData ( fb , ( const unsigned char * ) p , len ) ;
fileblobDestroy ( fb ) ;
return CL_SUCCESS ;
}
cli_dbgmsg ( " No text part found to save \n " ) ;
return CL_EFORMAT ;
}
2006-01-05 11:16:27 +00:00
static void
create_map ( const char * begin , const char * end )
{
const struct wordlist {
const char * word ;
int len ;
} wordlist [ ] = {
{ " base64 " , 6 } ,
{ " quoted-printable " , 16 } ,
{ " \n begin " , 7 } ,
2006-06-07 12:30:09 +00:00
{ " \n From " , 6 } ,
{ " \n \n " , 2 } ,
{ " \r \n \r \n " , 4 } ,
2006-01-05 11:16:27 +00:00
{ NULL , 0 }
} ;
if ( map ) {
cli_warnmsg ( " create_map called without free_map \n " ) ;
free_map ( ) ;
}
while ( begin < end ) {
const struct wordlist * word ;
for ( word = wordlist ; word - > word ; word + + ) {
if ( ( end - begin ) < word - > len )
continue ;
if ( strncasecmp ( begin , word - > word , word - > len ) = = 0 ) {
add_to_map ( begin , word - > word ) ;
break ;
}
}
begin + + ;
}
}
/* To sort map, assume 'offset' is presented in sorted order */
static void
add_to_map ( const char * offset , const char * word )
{
if ( map ) {
tail - > next = cli_malloc ( sizeof ( struct map ) ) ; /* FIXME: verify */
tail = tail - > next ;
} else
map = tail = cli_malloc ( sizeof ( struct map ) ) ; /* FIXME: verify */
tail - > offset = offset ;
tail - > word = word ;
tail - > next = NULL ;
}
static const char *
find_in_map ( const char * offset , const char * word )
{
const struct map * item ;
for ( item = map ; item ; item = item - > next )
if ( item - > offset > = offset )
if ( strcasecmp ( word , item - > word ) = = 0 )
return item - > offset ;
return NULL ;
}
static void
free_map ( void )
{
while ( map ) {
struct map * next = map - > next ;
free ( map ) ;
map = next ;
}
map = NULL ;
}
# else /*!NEW_WORLD*/
2004-12-16 15:29:08 +00:00
int
2006-05-03 09:36:40 +00:00
cli_mbox ( const char * dir , int desc , cli_ctx * ctx )
2004-12-16 15:29:08 +00:00
{
2005-03-02 20:08:24 +00:00
if ( dir = = NULL ) {
cli_warnmsg ( " cli_mbox called with NULL dir \n " ) ;
return CL_ENULLARG ;
}
2006-05-03 09:36:40 +00:00
return cli_parse_mbox ( dir , desc , ctx ) ;
2004-12-16 15:29:08 +00:00
}
# endif
2003-07-29 15:48:06 +00:00
/*
* TODO : when signal handling is added , need to remove temp files when a
2004-09-06 11:05:44 +00:00
* signal is received
2003-07-29 15:48:06 +00:00
* TODO : add option to scan in memory not via temp files , perhaps with a
2004-03-30 22:46:44 +00:00
* named pipe or memory mapped file , though this won ' t work on big e - mails
* containing many levels of encapsulated messages - it ' d just take too much
* RAM
2003-08-29 14:27:15 +00:00
* TODO : parse . msg format files
2003-10-12 12:39:49 +00:00
* TODO : fully handle AppleDouble format , see
2004-09-06 11:05:44 +00:00
* http : //www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
2003-12-20 13:57:26 +00:00
* TODO : ensure parseEmailHeaders is always called before parseEmailBody
* TODO : create parseEmail which calls parseEmailHeaders then parseEmailBody
2005-12-09 17:19:10 +00:00
* TODO : Handle unepected NUL bytes in header lines which stop strcmp ( ) s :
* e . g . \ 0 Content - Type : application / binary ;
2003-07-29 15:48:06 +00:00
*/
2004-12-16 15:29:08 +00:00
static int
2006-05-03 09:36:40 +00:00
cli_parse_mbox ( const char * dir , int desc , cli_ctx * ctx )
2003-07-29 15:48:06 +00:00
{
2003-08-29 14:27:15 +00:00
int retcode , i ;
2004-12-18 16:34:31 +00:00
message * body ;
2003-07-29 15:48:06 +00:00
FILE * fd ;
2005-04-28 14:46:44 +00:00
char buffer [ RFC2821LENGTH + 1 ] ;
2006-06-28 21:07:36 +00:00
mbox_ctx mctx ;
2004-09-15 18:11:55 +00:00
# ifdef HAVE_BACKTRACE
2004-06-22 04:08:02 +00:00
void ( * segv ) ( int ) ;
# endif
2004-08-17 08:31:58 +00:00
static table_t * rfc821 , * subtype ;
2005-03-15 18:01:25 +00:00
# ifdef CL_DEBUG
char tmpfilename [ 16 ] ;
int tmpfd ;
# endif
2006-09-21 14:42:06 +00:00
# ifdef FOLLOWURLS
static int initialised = 0 ;
# ifdef CL_THREAD_SAFE
static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER ;
# endif
# endif
2003-07-29 15:48:06 +00:00
2006-01-10 14:55:26 +00:00
# ifdef NEW_WORLD
cli_dbgmsg ( " fall back to old world \n " ) ;
# else
2003-07-29 15:48:06 +00:00
cli_dbgmsg ( " in mbox() \n " ) ;
2006-01-10 14:55:26 +00:00
# endif
2003-07-29 15:48:06 +00:00
2006-09-21 14:42:06 +00:00
# ifdef FOLLOWURLS
if ( ctx - > options & CL_SCAN_MAILURL ) {
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & init_mutex ) ;
# endif
if ( ! initialised ) {
if ( curl_global_init ( CURL_GLOBAL_ALL ) ! = 0 ) {
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & init_mutex ) ;
# endif
cli_warnmsg ( " curl_global_init failed, disabling mail-follow-urls " ) ;
ctx - > options & = ~ CL_SCAN_MAILURL ;
}
initialised = 1 ;
}
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & init_mutex ) ;
# endif
}
# endif
2003-08-29 14:27:15 +00:00
i = dup ( desc ) ;
if ( ( fd = fdopen ( i , " rb " ) ) = = NULL ) {
cli_errmsg ( " Can't open descriptor %d \n " , desc ) ;
close ( i ) ;
2004-09-06 11:05:44 +00:00
return CL_EOPEN ;
2003-08-29 14:27:15 +00:00
}
2005-03-15 18:01:25 +00:00
# ifdef CL_DEBUG
/*
* Copy the incoming mail for debugging , so that if it falls over
* we have a copy of the offending email . This is debugging code
* that you shouldn ' t of course install in a live environment . I am
* not interested in hearing about security issues with this section
* of the parser .
*/
strcpy ( tmpfilename , " /tmp/mboxXXXXXX " ) ;
tmpfd = mkstemp ( tmpfilename ) ;
if ( tmpfd < 0 ) {
perror ( tmpfilename ) ;
cli_errmsg ( " Can't make debugging file \n " ) ;
} else {
FILE * tmpfp = fdopen ( tmpfd , " w " ) ;
if ( tmpfp ) {
while ( fgets ( buffer , sizeof ( buffer ) - 1 , fd ) ! = NULL )
fputs ( buffer , tmpfp ) ;
fclose ( tmpfp ) ;
rewind ( fd ) ;
} else
cli_errmsg ( " Can't fdopen debugging file \n " ) ;
}
# endif
2004-11-08 10:30:05 +00:00
if ( fgets ( buffer , sizeof ( buffer ) - 1 , fd ) = = NULL ) {
2003-08-29 14:27:15 +00:00
/* empty message */
fclose ( fd ) ;
2005-03-15 18:01:25 +00:00
# ifdef CL_DEBUG
unlink ( tmpfilename ) ;
# endif
2004-09-06 11:05:44 +00:00
return CL_CLEAN ;
2003-08-29 14:27:15 +00:00
}
2004-06-16 08:07:39 +00:00
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & tables_mutex ) ;
# endif
2004-08-17 08:31:58 +00:00
if ( rfc821 = = NULL ) {
assert ( subtype = = NULL ) ;
2004-06-09 18:18:59 +00:00
2004-08-17 08:31:58 +00:00
if ( initialiseTables ( & rfc821 , & subtype ) < 0 ) {
rfc821 = NULL ;
subtype = NULL ;
2004-06-16 08:07:39 +00:00
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & tables_mutex ) ;
# endif
2004-06-09 18:18:59 +00:00
fclose ( fd ) ;
2005-03-15 18:01:25 +00:00
# ifdef CL_DEBUG
unlink ( tmpfilename ) ;
# endif
2004-09-06 11:05:44 +00:00
return CL_EMEM ;
2004-06-09 18:18:59 +00:00
}
2003-07-29 15:48:06 +00:00
}
2004-06-16 08:07:39 +00:00
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & tables_mutex ) ;
# endif
2003-07-29 15:48:06 +00:00
2004-06-30 14:32:28 +00:00
# ifdef HAVE_BACKTRACE
2004-06-22 04:08:02 +00:00
segv = signal ( SIGSEGV , sigsegv ) ;
# endif
2006-05-03 15:41:44 +00:00
retcode = CL_SUCCESS ;
2006-05-04 08:34:02 +00:00
body = NULL ;
2006-06-28 21:07:36 +00:00
mctx . dir = dir ;
mctx . rfc821Table = rfc821 ;
mctx . subtypeTable = subtype ;
mctx . ctx = ctx ;
2003-12-20 13:57:26 +00:00
/*
2005-04-07 16:38:37 +00:00
* Is it a UNIX style mbox with more than one
2003-12-20 13:57:26 +00:00
* mail message , or just a single mail message ?
2005-04-07 16:38:37 +00:00
*
* TODO : It would be better if we called cli_scandir here rather than
* in cli_scanmail . Then we could improve the way mailboxes with more
* than one message is handled , e . g . stopping parsing when an infected
* message is stopped , and giving a better indication of which message
* within the mailbox is infected
2003-12-20 13:57:26 +00:00
*/
2006-05-04 17:44:29 +00:00
/*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
if ( strncmp ( buffer , " From " , 5 ) = = 0 ) {
2003-07-29 15:48:06 +00:00
/*
2003-08-29 14:27:15 +00:00
* Have been asked to check a UNIX style mbox file , which
* may contain more than one e - mail message to decode
2004-12-19 13:50:08 +00:00
*
* It would be far better for scanners . c to do this splitting
* and do this
* FOR EACH mail in the mailbox
* DO
* pass this mail to cli_mbox - -
* scan this file
* IF this file has a virus quit
* THEN
* return CL_VIRUS
* FI
* END
* This would remove a problem with this code that it can
* fill up the tmp directory before it starts scanning
2003-07-29 15:48:06 +00:00
*/
2004-12-18 16:34:31 +00:00
bool lastLineWasEmpty ;
int messagenumber ;
message * m = messageCreate ( ) ;
if ( m = = NULL ) {
fclose ( fd ) ;
# ifdef HAVE_BACKTRACE
signal ( SIGSEGV , segv ) ;
2005-03-15 18:01:25 +00:00
# endif
# ifdef CL_DEBUG
unlink ( tmpfilename ) ;
2004-12-18 16:34:31 +00:00
# endif
return CL_EMEM ;
}
lastLineWasEmpty = FALSE ;
messagenumber = 1 ;
2006-05-03 15:41:44 +00:00
messageSetCTX ( m , ctx ) ;
2003-07-29 15:48:06 +00:00
2003-08-29 14:27:15 +00:00
do {
2003-12-20 13:57:26 +00:00
cli_chomp ( buffer ) ;
2006-05-04 17:44:29 +00:00
/*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
if ( lastLineWasEmpty & & ( strncmp ( buffer , " From " , 5 ) = = 0 ) ) {
2004-04-14 08:33:57 +00:00
cli_dbgmsg ( " Deal with email number %d \n " , messagenumber + + ) ;
2003-07-29 15:48:06 +00:00
/*
2003-12-20 13:57:26 +00:00
* End of a message in the mail box
2003-07-29 15:48:06 +00:00
*/
2004-08-21 12:01:07 +00:00
body = parseEmailHeaders ( m , rfc821 ) ;
2004-08-11 14:48:13 +00:00
if ( body = = NULL ) {
messageReset ( m ) ;
continue ;
}
2006-05-03 15:41:44 +00:00
messageSetCTX ( body , ctx ) ;
2003-12-20 13:57:26 +00:00
messageDestroy ( m ) ;
2006-05-03 15:41:44 +00:00
if ( messageGetBody ( body ) ) {
2006-06-28 21:07:36 +00:00
int rc = parseEmailBody ( body , NULL , & mctx ) ;
2006-05-03 15:41:44 +00:00
if ( rc = = 0 ) {
2004-03-10 22:07:54 +00:00
messageReset ( body ) ;
m = body ;
continue ;
2006-05-03 15:41:44 +00:00
} else if ( rc = = 3 ) {
cli_dbgmsg ( " Message number %d is infected \n " ,
messagenumber ) ;
retcode = CL_VIRUS ;
2006-07-30 12:39:01 +00:00
m = NULL ;
2006-05-03 15:41:44 +00:00
break ;
2004-03-10 22:07:54 +00:00
}
2006-05-03 15:41:44 +00:00
}
2003-07-29 15:48:06 +00:00
/*
2003-12-20 13:57:26 +00:00
* Starting a new message , throw away all the
2004-12-16 15:29:08 +00:00
* information about the old one . It would
* be best to be able to scan this message
* now , but cli_scanfile needs arguments
* that haven ' t been passed here so it can ' t be
* called
2003-07-29 15:48:06 +00:00
*/
2003-12-20 13:57:26 +00:00
m = body ;
messageReset ( body ) ;
2006-05-03 15:41:44 +00:00
messageSetCTX ( body , ctx ) ;
2003-07-29 15:48:06 +00:00
2003-08-29 14:27:15 +00:00
cli_dbgmsg ( " Finished processing message \n " ) ;
2003-12-20 13:57:26 +00:00
} else
2004-02-11 08:17:50 +00:00
lastLineWasEmpty = ( bool ) ( buffer [ 0 ] = = ' \0 ' ) ;
2005-03-07 11:26:18 +00:00
2005-05-31 18:15:40 +00:00
if ( isuuencodebegin ( buffer ) ) {
2005-03-18 18:12:25 +00:00
/*
2005-03-07 11:26:18 +00:00
* Fast track visa to uudecode .
* TODO : binhex , yenc
*/
2006-01-21 18:37:48 +00:00
if ( uudecodeFile ( m , buffer , dir , fd ) < 0 )
2005-05-31 18:15:40 +00:00
if ( messageAddStr ( m , buffer ) < 0 )
break ;
} else
2005-03-07 11:26:18 +00:00
if ( messageAddStr ( m , buffer ) < 0 )
break ;
2004-11-08 10:30:05 +00:00
} while ( fgets ( buffer , sizeof ( buffer ) - 1 , fd ) ! = NULL ) ;
2004-04-14 08:33:57 +00:00
2004-12-18 16:34:31 +00:00
fclose ( fd ) ;
2006-05-03 15:41:44 +00:00
if ( retcode = = CL_SUCCESS ) {
cli_dbgmsg ( " Extract attachments from email %d \n " , messagenumber ) ;
body = parseEmailHeaders ( m , rfc821 ) ;
}
if ( m )
messageDestroy ( m ) ;
2004-08-11 14:48:13 +00:00
} else {
2003-12-11 14:37:28 +00:00
/*
* It ' s a single message , parse the headers then the body
2004-08-11 14:48:13 +00:00
*/
2004-08-26 09:35:40 +00:00
if ( strncmp ( buffer , " P I " , 4 ) = = 0 )
/*
* CommuniGate Pro format : ignore headers until
* blank line
*/
2004-11-08 10:30:05 +00:00
while ( ( fgets ( buffer , sizeof ( buffer ) - 1 , fd ) ! = NULL ) & &
2004-08-26 09:35:40 +00:00
( strchr ( " \r \n " , buffer [ 0 ] ) = = NULL ) )
;
/*
* Ignore any blank lines at the top of the message
*/
2004-08-11 14:48:13 +00:00
while ( strchr ( " \r \n " , buffer [ 0 ] ) & &
2005-07-16 15:53:29 +00:00
( getline_from_mbox ( buffer , sizeof ( buffer ) - 1 , fd ) ! = NULL ) )
2004-08-12 10:37:53 +00:00
;
2004-11-28 21:05:50 +00:00
buffer [ sizeof ( buffer ) - 1 ] = ' \0 ' ;
2004-11-08 10:30:05 +00:00
2005-03-28 11:06:21 +00:00
body = parseEmailFile ( fd , rfc821 , buffer , dir ) ;
2004-12-18 16:34:31 +00:00
fclose ( fd ) ;
2004-08-11 14:48:13 +00:00
}
2003-12-11 14:37:28 +00:00
2004-08-11 14:48:13 +00:00
if ( body ) {
/*
* Write out the last entry in the mailbox
*/
2006-05-03 15:41:44 +00:00
if ( ( retcode = = CL_SUCCESS ) & & messageGetBody ( body ) ) {
messageSetCTX ( body , ctx ) ;
2006-06-28 21:07:36 +00:00
switch ( parseEmailBody ( body , NULL , & mctx ) ) {
2006-05-03 15:41:44 +00:00
case 0 :
retcode = CL_EFORMAT ;
break ;
case 3 :
retcode = CL_VIRUS ;
break ;
}
}
2003-07-29 15:48:06 +00:00
2004-08-11 14:48:13 +00:00
/*
* Tidy up and quit
*/
messageDestroy ( body ) ;
}
2003-07-29 15:48:06 +00:00
cli_dbgmsg ( " cli_mbox returning %d \n " , retcode ) ;
2004-06-30 14:32:28 +00:00
# ifdef HAVE_BACKTRACE
2004-06-22 04:08:02 +00:00
signal ( SIGSEGV , segv ) ;
# endif
2005-03-15 18:01:25 +00:00
# ifdef CL_DEBUG
unlink ( tmpfilename ) ;
# endif
2003-07-29 15:48:06 +00:00
return retcode ;
}
2003-12-11 14:37:28 +00:00
/*
2004-12-18 16:34:31 +00:00
* Read in an email message from fin , parse it , and return the message
2003-12-11 14:37:28 +00:00
*
2004-12-18 16:34:31 +00:00
* FIXME : files full of new lines and nothing else are
* handled ungracefully . . .
*/
static message *
2005-03-28 11:06:21 +00:00
parseEmailFile ( FILE * fin , const table_t * rfc821 , const char * firstLine , const char * dir )
2004-12-18 16:34:31 +00:00
{
bool inHeader = TRUE ;
2006-05-16 20:16:38 +00:00
bool bodyIsEmpty = TRUE ;
2006-04-07 11:26:34 +00:00
bool lastWasBlank = FALSE , lastBodyLineWasBlank = FALSE ;
2004-12-18 16:34:31 +00:00
message * ret ;
bool anyHeadersFound = FALSE ;
int commandNumber = - 1 ;
2005-03-22 11:28:42 +00:00
char * fullline = NULL , * boundary = NULL ;
2004-12-18 16:34:31 +00:00
size_t fulllinelength = 0 ;
2005-04-28 14:46:44 +00:00
char buffer [ RFC2821LENGTH + 1 ] ;
2004-12-18 16:34:31 +00:00
cli_dbgmsg ( " parseEmailFile \n " ) ;
ret = messageCreate ( ) ;
if ( ret = = NULL )
return NULL ;
strcpy ( buffer , firstLine ) ;
do {
2006-07-12 21:21:25 +00:00
const char * line ;
2004-12-18 16:34:31 +00:00
( void ) cli_chomp ( buffer ) ;
2006-07-12 21:21:25 +00:00
if ( buffer [ 0 ] = = ' \0 ' )
2005-04-19 09:23:12 +00:00
line = NULL ;
2006-07-12 21:21:25 +00:00
else
line = buffer ;
2004-12-18 16:34:31 +00:00
/*
* Don ' t blank lines which are only spaces from headers ,
* otherwise they ' ll be treated as the end of header marker
*/
2005-03-22 11:28:42 +00:00
if ( lastWasBlank ) {
lastWasBlank = FALSE ;
if ( boundaryStart ( buffer , boundary ) ) {
cli_dbgmsg ( " Found a header line with space that should be blank \n " ) ;
inHeader = FALSE ;
}
}
2004-12-18 16:34:31 +00:00
if ( inHeader ) {
2006-07-12 19:22:50 +00:00
cli_dbgmsg ( " parseEmailFile: check '%s' fullline %p \n " ,
buffer ? buffer : " " , fullline ) ;
2005-04-19 09:23:12 +00:00
if ( line & & isspace ( line [ 0 ] ) ) {
2005-03-22 11:28:42 +00:00
char copy [ sizeof ( buffer ) ] ;
strcpy ( copy , buffer ) ;
strstrip ( copy ) ;
if ( copy [ 0 ] = = ' \0 ' ) {
/*
2005-03-22 11:33:26 +00:00
* The header line contains only white
* space . This is not the end of the
* headers according to RFC2822 , but
* some MUAs will handle it as though
* it were , and virus writers exploit
* this bug . We can ' t just break from
* the loop here since that would allow
* other exploits such as inserting a
* white space line before the
* content - type line . So we just have
* to make a best guess . Sigh .
2005-03-22 11:28:42 +00:00
*/
if ( fullline ) {
if ( parseEmailHeader ( ret , fullline , rfc821 ) < 0 )
continue ;
free ( fullline ) ;
fullline = NULL ;
}
2006-07-08 19:58:28 +00:00
if ( boundary | |
( ( boundary = ( char * ) messageFindArgument ( ret , " boundary " ) ) ! = NULL ) ) {
2005-03-22 11:28:42 +00:00
lastWasBlank = TRUE ;
continue ;
}
}
}
2005-04-19 09:23:12 +00:00
if ( ( line = = NULL ) & & ( fullline = = NULL ) ) { /* empty line */
2006-07-12 19:22:50 +00:00
/*
* A blank line signifies the end of
* the header and the start of the text
*/
if ( ! anyHeadersFound )
/* Ignore the junk at the top */
continue ;
2005-01-27 14:12:00 +00:00
2006-07-12 19:22:50 +00:00
cli_dbgmsg ( " End of header information \n " ) ;
inHeader = FALSE ;
bodyIsEmpty = TRUE ;
2004-12-18 16:34:31 +00:00
} else {
char * ptr ;
2005-01-06 23:25:18 +00:00
int lookahead ;
2004-12-18 16:34:31 +00:00
if ( fullline = = NULL ) {
2005-04-28 14:46:44 +00:00
char cmd [ RFC2821LENGTH + 1 ] , out [ RFC2821LENGTH + 1 ] ;
2004-12-18 16:34:31 +00:00
/*
* Continuation of line we ' re ignoring ?
*/
2006-07-12 19:22:50 +00:00
if ( isblank ( line [ 0 ] ) )
2004-12-18 16:34:31 +00:00
continue ;
/*
* Is this a header we ' re interested in ?
*/
2005-04-19 09:23:12 +00:00
if ( ( strchr ( line , ' : ' ) = = NULL ) | |
( cli_strtokbuf ( line , 0 , " : " , cmd ) = = NULL ) ) {
if ( strncmp ( line , " From " , 5 ) = = 0 )
2004-12-18 16:34:31 +00:00
anyHeadersFound = TRUE ;
continue ;
}
2005-02-16 22:20:49 +00:00
ptr = rfc822comments ( cmd , out ) ;
2004-12-18 16:34:31 +00:00
commandNumber = tableFind ( rfc821 , ptr ? ptr : cmd ) ;
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
anyHeadersFound = TRUE ;
break ;
default :
if ( ! anyHeadersFound )
anyHeadersFound = usefulHeader ( commandNumber , cmd ) ;
continue ;
}
2005-04-19 09:23:12 +00:00
fullline = strdup ( line ) ;
fulllinelength = strlen ( line ) + 1 ;
} else if ( line ! = NULL ) {
fulllinelength + = strlen ( line ) ;
2005-05-13 19:43:37 +00:00
ptr = cli_realloc ( fullline , fulllinelength ) ;
if ( ptr = = NULL )
continue ;
fullline = ptr ;
2005-04-19 09:23:12 +00:00
strcat ( fullline , line ) ;
2004-12-18 16:34:31 +00:00
}
assert ( fullline ! = NULL ) ;
lookahead = getc ( fin ) ;
if ( lookahead ! = EOF ) {
ungetc ( lookahead , fin ) ;
/*
* Section B .2 of RFC822 says TAB or
* SPACE means a continuation of the
* previous entry .
*
* Add all the arguments on the line
*/
2005-12-09 17:19:10 +00:00
if ( isblank ( lookahead ) )
2004-12-18 16:34:31 +00:00
continue ;
}
2006-07-12 21:21:25 +00:00
if ( line & & ( count_quotes ( fullline ) & 1 ) )
continue ;
2004-12-18 16:34:31 +00:00
2005-02-16 22:20:49 +00:00
ptr = rfc822comments ( fullline , NULL ) ;
2004-12-18 16:34:31 +00:00
if ( ptr ) {
free ( fullline ) ;
fullline = ptr ;
}
if ( parseEmailHeader ( ret , fullline , rfc821 ) < 0 )
continue ;
free ( fullline ) ;
fullline = NULL ;
}
2005-05-31 18:15:40 +00:00
} else if ( line & & isuuencodebegin ( line ) ) {
2005-03-28 11:06:21 +00:00
/*
* Fast track visa to uudecode .
* TODO : binhex , yenc
*/
2006-05-16 20:16:38 +00:00
bodyIsEmpty = FALSE ;
2006-01-21 18:37:48 +00:00
if ( uudecodeFile ( ret , line , dir , fin ) < 0 )
2005-05-31 18:15:40 +00:00
if ( messageAddStr ( ret , line ) < 0 )
break ;
2006-04-07 11:26:34 +00:00
} else {
if ( line = = NULL ) {
2006-09-13 21:40:03 +00:00
/*
* Although this would save time and RAM , some
* phish signatures have been built which need
* the blank lines
*/
if ( lastBodyLineWasBlank & &
( messageGetMimeType ( ret ) ! = TEXT ) ) {
2006-04-07 11:26:34 +00:00
cli_dbgmsg ( " Ignoring consecutive blank lines in the body \n " ) ;
continue ;
}
lastBodyLineWasBlank = TRUE ;
2006-05-16 20:16:38 +00:00
} else {
if ( bodyIsEmpty ) {
/*
* Broken message : new line in the
* middle of the headers , so the first
* line of the body is in fact
* the last lines of the header
*/
if ( strncmp ( line , " Message-Id: " , 12 ) = = 0 )
continue ;
if ( strncmp ( line , " Date: " , 6 ) = = 0 )
continue ;
}
bodyIsEmpty = FALSE ;
2006-04-07 11:26:34 +00:00
lastBodyLineWasBlank = FALSE ;
2006-05-16 20:16:38 +00:00
}
2006-04-07 11:26:34 +00:00
2005-04-19 09:23:12 +00:00
if ( messageAddStr ( ret , line ) < 0 )
2004-12-18 16:34:31 +00:00
break ;
2006-04-07 11:26:34 +00:00
}
2005-07-16 15:53:29 +00:00
} while ( getline_from_mbox ( buffer , sizeof ( buffer ) - 1 , fin ) ! = NULL ) ;
2004-12-18 16:34:31 +00:00
2006-07-08 19:58:28 +00:00
if ( boundary )
free ( boundary ) ;
2004-12-18 16:34:31 +00:00
if ( fullline ) {
if ( * fullline ) switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
2006-05-28 09:31:22 +00:00
cli_dbgmsg ( " parseEmailFile: Fullline unparsed '%s' \n " , fullline ) ;
2004-12-18 16:34:31 +00:00
}
free ( fullline ) ;
}
if ( ! anyHeadersFound ) {
/*
* False positive in believing we have an e - mail when we don ' t
*/
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailFile: no headers found, assuming it isn't an email \n " ) ;
return NULL ;
}
messageClean ( ret ) ;
cli_dbgmsg ( " parseEmailFile: return \n " ) ;
return ret ;
}
/*
* The given message contains a raw e - mail .
2003-12-14 18:08:29 +00:00
*
* Returns the message ' s body with the correct arguments set
2004-06-25 13:58:41 +00:00
*
* The downside of this approach is that for a short time we have two copies
* of the message in memory , the upside is that it makes for easier parsing
* of encapsulated messages , and in the long run uses less memory in those
* scenarios
2004-12-18 16:34:31 +00:00
*
* TODO : remove the duplication with parseEmailFile
2003-12-11 14:37:28 +00:00
*/
2003-12-14 18:08:29 +00:00
static message *
2006-07-01 16:21:03 +00:00
parseEmailHeaders ( message * m , const table_t * rfc821 )
2003-12-11 14:37:28 +00:00
{
2003-12-14 18:08:29 +00:00
bool inHeader = TRUE ;
2006-05-28 09:31:22 +00:00
bool bodyIsEmpty = TRUE ;
2004-08-21 12:01:07 +00:00
const text * t ;
2003-12-20 13:57:26 +00:00
message * ret ;
2004-08-11 14:48:13 +00:00
bool anyHeadersFound = FALSE ;
2004-10-16 09:03:43 +00:00
int commandNumber = - 1 ;
2004-11-09 10:10:39 +00:00
char * fullline = NULL ;
2004-11-11 22:18:10 +00:00
size_t fulllinelength = 0 ;
2003-12-20 13:57:26 +00:00
2004-06-22 04:08:02 +00:00
cli_dbgmsg ( " parseEmailHeaders \n " ) ;
2003-12-20 13:57:26 +00:00
if ( m = = NULL )
return NULL ;
ret = messageCreate ( ) ;
2003-12-11 14:37:28 +00:00
2004-08-21 12:01:07 +00:00
for ( t = messageGetBody ( m ) ; t ; t = t - > t_next ) {
const char * buffer ;
2003-12-11 14:37:28 +00:00
2004-08-21 12:01:07 +00:00
if ( t - > t_line )
buffer = lineGetData ( t - > t_line ) ;
else
2004-06-22 04:08:02 +00:00
buffer = NULL ;
2003-12-11 14:37:28 +00:00
2004-09-03 16:02:25 +00:00
if ( inHeader ) {
2005-01-09 21:26:07 +00:00
cli_dbgmsg ( " parseEmailHeaders: check '%s' \n " ,
buffer ? buffer : " " ) ;
2004-12-18 16:34:31 +00:00
if ( buffer = = NULL ) {
2005-01-09 21:26:07 +00:00
/*
* A blank line signifies the end of
* the header and the start of the text
*/
cli_dbgmsg ( " End of header information \n " ) ;
2005-03-06 21:13:16 +00:00
if ( ! anyHeadersFound ) {
cli_dbgmsg ( " Nothing interesting in the header \n " ) ;
break ;
}
2006-05-28 09:31:22 +00:00
inHeader = FALSE ;
bodyIsEmpty = TRUE ;
2004-11-11 22:18:10 +00:00
} else {
2004-11-22 15:19:43 +00:00
char * ptr ;
2004-11-11 22:18:10 +00:00
if ( fullline = = NULL ) {
2005-04-28 14:46:44 +00:00
char cmd [ RFC2821LENGTH + 1 ] ;
2004-11-12 22:22:21 +00:00
/*
* Continuation of line we ' re ignoring ?
*/
2005-12-09 17:19:10 +00:00
if ( isblank ( buffer [ 0 ] ) )
2004-11-12 22:22:21 +00:00
continue ;
/*
* Is this a header we ' re interested in ?
*/
2004-11-22 15:19:43 +00:00
if ( ( strchr ( buffer , ' : ' ) = = NULL ) | |
( cli_strtokbuf ( buffer , 0 , " : " , cmd ) = = NULL ) ) {
if ( strncmp ( buffer , " From " , 5 ) = = 0 )
anyHeadersFound = TRUE ;
2004-11-12 22:22:21 +00:00
continue ;
2004-11-22 15:19:43 +00:00
}
2004-11-12 22:22:21 +00:00
2005-02-16 22:20:49 +00:00
ptr = rfc822comments ( cmd , NULL ) ;
2004-11-27 12:02:36 +00:00
commandNumber = tableFind ( rfc821 , ptr ? ptr : cmd ) ;
if ( ptr )
free ( ptr ) ;
2004-11-12 22:22:21 +00:00
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
2004-11-22 15:19:43 +00:00
anyHeadersFound = TRUE ;
2004-11-12 22:22:21 +00:00
break ;
default :
2004-12-18 16:34:31 +00:00
if ( ! anyHeadersFound )
anyHeadersFound = usefulHeader ( commandNumber , cmd ) ;
2004-11-12 22:22:21 +00:00
continue ;
}
fullline = strdup ( buffer ) ;
fulllinelength = strlen ( buffer ) + 1 ;
} else if ( buffer ) {
fulllinelength + = strlen ( buffer ) ;
2005-05-13 19:43:37 +00:00
ptr = cli_realloc ( fullline , fulllinelength ) ;
if ( ptr = = NULL )
continue ;
fullline = ptr ;
2004-11-12 22:22:21 +00:00
strcat ( fullline , buffer ) ;
2004-11-11 22:18:10 +00:00
}
2004-10-20 10:39:15 +00:00
2004-12-18 16:34:31 +00:00
assert ( fullline ! = NULL ) ;
2004-11-11 22:18:10 +00:00
2006-07-24 12:14:46 +00:00
if ( next_is_folded_header ( t ) )
/* Add arguments to this line */
continue ;
2004-11-11 22:18:10 +00:00
2006-07-12 21:21:25 +00:00
if ( count_quotes ( fullline ) & 1 )
2004-11-11 22:18:10 +00:00
continue ;
2005-02-16 22:20:49 +00:00
ptr = rfc822comments ( fullline , NULL ) ;
2004-11-11 22:18:10 +00:00
if ( ptr ) {
free ( fullline ) ;
fullline = ptr ;
}
2004-09-28 18:40:12 +00:00
2004-12-18 16:34:31 +00:00
if ( parseEmailHeader ( ret , fullline , rfc821 ) < 0 )
continue ;
2004-09-03 16:02:25 +00:00
2004-12-18 16:34:31 +00:00
free ( fullline ) ;
fullline = NULL ;
2004-08-04 19:00:43 +00:00
}
2006-05-28 09:31:22 +00:00
} else {
if ( bodyIsEmpty ) {
if ( buffer = = NULL )
/* throw away leading blank lines */
continue ;
cli_dbgmsg ( " bodyIsEmpty, check \" %s \" \n " , buffer ) ;
/*
* Broken message : new line in the
* middle of the headers , so the first
* line of the body is in fact
* the last lines of the header
*/
if ( strncmp ( buffer , " Message-Id: " , 12 ) = = 0 )
continue ;
if ( strncmp ( buffer , " Date: " , 6 ) = = 0 )
continue ;
bodyIsEmpty = FALSE ;
}
/*if(t->t_line && isuuencodebegin(t->t_line))
puts ( " FIXME: add fast visa here " ) ; */
2004-03-10 22:07:54 +00:00
/*cli_dbgmsg("Add line to body '%s'\n", buffer);*/
2004-08-21 12:01:07 +00:00
if ( messageAddLine ( ret , t - > t_line ) < 0 )
2004-07-06 09:32:45 +00:00
break ;
2006-05-28 09:31:22 +00:00
}
2004-03-29 09:23:12 +00:00
}
2003-12-14 18:08:29 +00:00
2004-11-09 10:10:39 +00:00
if ( fullline ) {
2004-11-11 22:18:10 +00:00
if ( * fullline ) switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
2005-01-19 17:41:25 +00:00
cli_dbgmsg ( " parseEmailHeaders: Fullline unparsed '%s' \n " , fullline ) ;
2004-11-11 22:18:10 +00:00
}
2004-11-09 10:10:39 +00:00
free ( fullline ) ;
}
2004-08-11 14:48:13 +00:00
if ( ! anyHeadersFound ) {
/*
* False positive in believing we have an e - mail when we don ' t
*/
messageDestroy ( ret ) ;
cli_dbgmsg ( " parseEmailHeaders: no headers found, assuming it isn't an email \n " ) ;
return NULL ;
}
2004-04-05 09:37:13 +00:00
messageClean ( ret ) ;
2004-03-10 22:07:54 +00:00
cli_dbgmsg ( " parseEmailHeaders: return \n " ) ;
2003-12-14 18:08:29 +00:00
return ret ;
2003-12-11 14:37:28 +00:00
}
2004-01-13 10:14:13 +00:00
/*
* Handle a header line of an email message
*/
static int
2004-08-17 08:31:58 +00:00
parseEmailHeader ( message * m , const char * line , const table_t * rfc821 )
2004-01-13 10:14:13 +00:00
{
2004-09-16 14:26:20 +00:00
char * cmd ;
2004-01-13 10:14:13 +00:00
int ret = - 1 ;
# ifdef CL_THREAD_SAFE
char * strptr ;
# endif
2004-09-16 13:01:30 +00:00
const char * separater ;
2004-09-16 18:03:25 +00:00
char * copy , tokenseparater [ 2 ] ;
2004-01-13 10:14:13 +00:00
2004-02-14 19:05:27 +00:00
cli_dbgmsg ( " parseEmailHeader '%s' \n " , line ) ;
2004-09-16 13:01:30 +00:00
/*
* In RFC822 the separater between the key a value is a colon ,
* e . g . Content - Transfer - Encoding : base64
* However some MUA ' s are lapse about this and virus writers exploit
* this hole , so we need to check all known possiblities
*/
for ( separater = " := " ; * separater ; separater + + )
if ( strchr ( line , * separater ) ! = NULL )
break ;
if ( * separater = = ' \0 ' )
2004-03-30 22:46:44 +00:00
return - 1 ;
2004-09-16 18:03:25 +00:00
copy = rfc2047 ( line ) ;
if ( copy = = NULL )
2005-01-09 11:40:01 +00:00
/* an RFC checker would return -1 here */
copy = strdup ( line ) ;
2004-06-28 11:47:16 +00:00
2004-09-16 13:01:30 +00:00
tokenseparater [ 0 ] = * separater ;
tokenseparater [ 1 ] = ' \0 ' ;
2004-09-21 08:16:29 +00:00
# ifdef CL_THREAD_SAFE
2004-09-16 13:01:30 +00:00
cmd = strtok_r ( copy , tokenseparater , & strptr ) ;
2004-09-21 08:16:29 +00:00
# else
cmd = strtok ( copy , tokenseparater ) ;
# endif
2004-01-13 10:14:13 +00:00
2004-09-15 22:11:50 +00:00
if ( cmd & & ( strstrip ( cmd ) > 0 ) ) {
2004-09-21 08:16:29 +00:00
# ifdef CL_THREAD_SAFE
2004-01-13 10:14:13 +00:00
char * arg = strtok_r ( NULL , " " , & strptr ) ;
2004-09-21 08:16:29 +00:00
# else
char * arg = strtok ( NULL , " " ) ;
# endif
2004-01-13 10:14:13 +00:00
if ( arg )
/*
* Found a header such as
* Content - Type : multipart / mixed ;
* set arg to be
* " multipart/mixed " and cmd to
2004-02-23 10:14:50 +00:00
* be " Content-Type "
2004-01-13 10:14:13 +00:00
*/
2004-08-17 08:31:58 +00:00
ret = parseMimeHeader ( m , cmd , rfc821 , arg ) ;
2004-01-13 10:14:13 +00:00
}
2004-09-16 18:03:25 +00:00
free ( copy ) ;
2004-01-13 10:14:13 +00:00
return ret ;
}
2003-07-29 15:48:06 +00:00
/*
* This is a recursive routine .
2005-04-07 16:38:37 +00:00
* FIXME : We are not passed & mrec so we can ' t check against MAX_MAIL_RECURSION
2003-07-29 15:48:06 +00:00
*
2003-12-11 14:37:28 +00:00
* This function parses the body of mainMessage and saves its attachments in dir
*
2003-12-14 18:08:29 +00:00
* mainMessage is the buffer to be parsed , it contains an e - mail ' s body , without
2004-08-04 19:00:43 +00:00
* any headers . First time of calling it ' ll be
* the whole message . Later it ' ll be parts of a multipart message
2003-07-29 15:48:06 +00:00
* textIn is the plain text message being built up so far
*
2003-08-02 22:37:52 +00:00
* Returns :
2003-07-29 15:48:06 +00:00
* 0 for fail
2004-01-09 10:22:28 +00:00
* 1 for success , attachments saved
* 2 for success , attachments not saved
2006-05-03 15:41:44 +00:00
* 3 for virus found
2003-07-29 15:48:06 +00:00
*/
static int /* success or fail */
2006-06-28 21:07:36 +00:00
parseEmailBody ( message * messageIn , text * textIn , mbox_ctx * mctx )
2003-07-29 15:48:06 +00:00
{
2006-07-01 16:21:03 +00:00
int rc = 1 ;
text * aText = textIn ;
message * mainMessage = messageIn ;
2004-08-22 20:20:14 +00:00
fileblob * fb ;
2006-05-03 15:41:44 +00:00
bool infected = FALSE ;
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
const int doPhishingScan = ! ( mctx - > ctx - > options & CL_SCAN_NOPHISHING ) ; /* || (mctx->ctx->options&CL_SCAN_PHISHING_GA_TRAIN) || (mctx->ctx->options&CL_SCAN_PHISHING_GA); kept here for the GA MERGE */
# endif
2006-09-20 10:24:17 +00:00
2004-08-22 20:20:14 +00:00
cli_dbgmsg ( " in parseEmailBody \n " ) ;
2003-07-29 15:48:06 +00:00
/* Anything left to be parsed? */
2003-08-02 22:37:52 +00:00
if ( mainMessage & & ( messageGetBody ( mainMessage ) ! = NULL ) ) {
2003-07-29 15:48:06 +00:00
mime_type mimeType ;
2006-07-01 16:21:03 +00:00
int subtype , inhead , htmltextPart , inMimeHead , i ;
2004-10-04 12:21:11 +00:00
const char * mimeSubtype , * boundary ;
char * protocol ;
2003-07-29 15:48:06 +00:00
const text * t_line ;
2003-12-06 04:05:18 +00:00
/*bool isAlternative;*/
2003-07-29 15:48:06 +00:00
message * aMessage ;
2006-07-01 16:21:03 +00:00
int multiparts = 0 ;
message * * messages = NULL ; /* parts of a multipart message */
2003-07-29 15:48:06 +00:00
2003-08-29 14:27:15 +00:00
cli_dbgmsg ( " Parsing mail file \n " ) ;
2003-07-29 15:48:06 +00:00
mimeType = messageGetMimeType ( mainMessage ) ;
mimeSubtype = messageGetMimeSubtype ( mainMessage ) ;
2004-12-04 15:52:53 +00:00
/* pre-process */
2006-06-28 21:07:36 +00:00
subtype = tableFind ( mctx - > subtypeTable , mimeSubtype ) ;
2004-10-01 13:50:47 +00:00
if ( ( mimeType = = TEXT ) & & ( subtype = = PLAIN ) ) {
2003-07-29 15:48:06 +00:00
/*
* This is effectively no encoding , notice that we
* don ' t check that charset is us - ascii
*/
cli_dbgmsg ( " assume no encoding \n " ) ;
mimeType = NOMIME ;
2005-03-28 11:06:21 +00:00
messageSetMimeSubtype ( mainMessage , " " ) ;
2004-12-04 15:52:53 +00:00
} else if ( ( mimeType = = MESSAGE ) & &
( strcasecmp ( mimeSubtype , " rfc822-headers " ) = = 0 ) ) {
/*
* RFC1892 / RFC3462 : section 2 text / rfc822 - headers
* incorrectly sent as message / rfc822 - headers
2004-12-07 09:01:24 +00:00
*
* Parse as text / plain , i . e . no mime
2004-12-04 15:52:53 +00:00
*/
cli_dbgmsg ( " Changing message/rfc822-headers to text/rfc822-headers \n " ) ;
2004-12-07 09:01:24 +00:00
mimeType = NOMIME ;
2005-03-28 11:06:21 +00:00
messageSetMimeSubtype ( mainMessage , " " ) ;
2006-06-28 16:06:07 +00:00
} else
cli_dbgmsg ( " mimeType = %d \n " , mimeType ) ;
2003-08-29 14:27:15 +00:00
2003-07-29 15:48:06 +00:00
switch ( mimeType ) {
case NOMIME :
2005-01-06 11:55:38 +00:00
cli_dbgmsg ( " Not a mime encoded message \n " ) ;
2003-07-29 15:48:06 +00:00
aText = textAddMessage ( aText , mainMessage ) ;
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
2006-09-16 08:30:55 +00:00
if ( ! doPhishingScan )
break ;
/*else: fall-through: some phishing mails claim they are text/plain, when they are indeed html*/
2006-09-16 10:38:17 +00:00
# else
break ;
2006-09-13 21:40:03 +00:00
# endif
2003-07-29 15:48:06 +00:00
case TEXT :
2004-12-04 16:03:55 +00:00
/* text/plain has been preprocessed as no encoding */
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
2006-09-16 10:38:17 +00:00
if ( ( subtype = = HTML ) | | doPhishingScan ) {
2006-09-13 21:40:03 +00:00
# else
2006-06-28 21:07:36 +00:00
if ( ( mctx - > ctx - > options & CL_SCAN_MAILURL ) & & ( subtype = = HTML ) )
2006-09-13 21:40:03 +00:00
# endif
2006-07-04 08:40:46 +00:00
/*
* It would be better to save and scan the
* file and only checkURLs if it ' s found to be
* clean
*/
2006-09-16 10:38:17 +00:00
checkURLs ( mainMessage , mctx , & rc , ( subtype = = HTML ) ) ;
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
2006-09-16 10:38:17 +00:00
/*
* There might be html sent without subtype
* html too , so scan them for phishing
*/
2006-09-16 09:55:25 +00:00
if ( rc = = 3 )
2006-09-16 10:38:17 +00:00
infected = TRUE ;
2006-09-13 21:40:03 +00:00
}
# endif
2003-07-29 15:48:06 +00:00
break ;
case MULTIPART :
2004-12-01 13:16:08 +00:00
cli_dbgmsg ( " Content-type 'multipart' handler \n " ) ;
2003-07-29 15:48:06 +00:00
boundary = messageFindArgument ( mainMessage , " boundary " ) ;
if ( boundary = = NULL ) {
2006-07-24 12:14:46 +00:00
cli_warnmsg ( " Multipart/%s MIME message contains no boundary header \n " ,
mimeSubtype ) ;
2003-11-21 07:28:39 +00:00
/* Broken e-mail message */
mimeType = NOMIME ;
/*
* The break means that we will still
* check if the file contains a uuencoded file
*/
break ;
2003-07-29 15:48:06 +00:00
}
2004-10-24 04:38:09 +00:00
/* Perhaps it should assume mixed? */
2004-06-14 09:08:29 +00:00
if ( mimeSubtype [ 0 ] = = ' \0 ' ) {
cli_warnmsg ( " Multipart has no subtype assuming alternative \n " ) ;
mimeSubtype = " alternative " ;
messageSetMimeSubtype ( mainMessage , " alternative " ) ;
}
2003-07-29 15:48:06 +00:00
/*
* Get to the start of the first message
*/
2004-02-14 19:05:27 +00:00
t_line = messageGetBody ( mainMessage ) ;
if ( t_line = = NULL ) {
cli_warnmsg ( " Multipart MIME message has no body \n " ) ;
free ( ( char * ) boundary ) ;
mimeType = NOMIME ;
break ;
}
do
2004-11-09 19:43:48 +00:00
if ( t_line - > t_line ) {
if ( boundaryStart ( lineGetData ( t_line - > t_line ) , boundary ) )
break ;
/*
2005-04-21 11:13:41 +00:00
* Found a binhex file before
2005-02-18 18:05:31 +00:00
* the first multipart
2004-11-22 15:19:43 +00:00
* TODO : check yEnc
2004-11-09 19:43:48 +00:00
*/
2005-04-21 11:13:41 +00:00
if ( binhexBegin ( mainMessage ) = = t_line ) {
2006-07-04 08:40:46 +00:00
if ( exportBinhexMessage ( mctx - > dir , mainMessage ) ) {
2006-06-28 16:06:07 +00:00
/* virus found */
rc = 3 ;
2006-07-04 08:40:46 +00:00
infected = TRUE ;
2006-06-28 16:06:07 +00:00
break ;
2004-11-22 15:19:43 +00:00
}
2004-11-27 14:17:35 +00:00
} else if ( encodingLine ( mainMessage ) = = t_line - > t_next ) {
/*
* We look for the next line
* since later on we ' ll skip
* over the important line when
* we think it ' s a blank line
* at the top of the message -
* which it would have been in
* an RFC compliant world
*/
cli_dbgmsg ( " Found MIME attachment before the first MIME section \n " ) ;
if ( messageGetEncoding ( mainMessage ) = = NOENCODING )
break ;
2004-11-22 15:19:43 +00:00
}
2004-11-09 19:43:48 +00:00
}
2004-02-14 19:05:27 +00:00
while ( ( t_line = t_line - > t_next ) ! = NULL ) ;
2003-07-29 15:48:06 +00:00
if ( t_line = = NULL ) {
2006-07-04 08:40:46 +00:00
cli_dbgmsg ( " Multipart MIME message contains no boundary lines (%s) \n " ,
boundary ) ;
2003-09-14 19:00:00 +00:00
/*
* Free added by Thomas Lamy
* < Thomas . Lamy @ in - online . net >
*/
free ( ( char * ) boundary ) ;
2003-11-21 07:28:39 +00:00
mimeType = NOMIME ;
/*
* The break means that we will still
2005-04-21 11:13:41 +00:00
* check if the file contains a yEnc / binhex file
2003-11-21 07:28:39 +00:00
*/
break ;
2003-07-29 15:48:06 +00:00
}
/*
* Build up a table of all of the parts of this
* multipart message . Remember , each part may itself
* be a multipart message .
*/
inhead = 1 ;
inMimeHead = 0 ;
2006-07-03 12:09:58 +00:00
/*
* Re - read this variable in case mimeSubtype has changed
*/
subtype = tableFind ( mctx - > subtypeTable , mimeSubtype ) ;
2003-12-14 18:08:29 +00:00
/*
2004-10-04 12:21:11 +00:00
* Parse the mainMessage object and create an array
* of objects called messages , one for each of the
2006-07-03 12:09:58 +00:00
* multiparts that mainMessage contains .
2006-07-03 09:19:15 +00:00
*
2003-12-14 18:08:29 +00:00
* This looks like parseEmailHeaders ( ) - maybe there ' s
* some duplication of code to be cleaned up
2006-07-03 12:09:58 +00:00
*
2006-07-24 12:14:46 +00:00
* We may need to create an array rather than just
2006-07-03 12:09:58 +00:00
* save each part as it is found because not all
* elements will need scanning , and we don ' t yet know
* which of those elements it will be , except in
* the case of mixed , when all parts need to be scanned .
2003-12-14 18:08:29 +00:00
*/
2006-07-03 12:09:58 +00:00
for ( multiparts = 0 ; t_line & & ! infected ; multiparts + + ) {
2004-02-18 10:09:46 +00:00
int lines = 0 ;
2004-07-26 17:02:56 +00:00
message * * m ;
2004-02-18 10:09:46 +00:00
2004-07-26 17:02:56 +00:00
m = cli_realloc ( messages , ( ( multiparts + 1 ) * sizeof ( message * ) ) ) ;
2004-08-04 19:00:43 +00:00
if ( m = = NULL )
2004-07-26 17:02:56 +00:00
break ;
messages = m ;
2004-06-18 10:09:33 +00:00
2003-07-29 15:48:06 +00:00
aMessage = messages [ multiparts ] = messageCreate ( ) ;
2004-06-30 14:32:28 +00:00
if ( aMessage = = NULL ) {
multiparts - - ;
continue ;
}
2006-06-28 21:07:36 +00:00
messageSetCTX ( aMessage , mctx - > ctx ) ;
2003-07-29 15:48:06 +00:00
cli_dbgmsg ( " Now read in part %d \n " , multiparts ) ;
2004-02-03 22:54:59 +00:00
/*
* Ignore blank lines . There shouldn ' t be ANY
* but some viruses insert them
*/
2004-06-22 04:08:02 +00:00
while ( ( t_line = t_line - > t_next ) ! = NULL )
2004-08-21 12:01:07 +00:00
if ( t_line - > t_line & &
/*(cli_chomp(t_line->t_text) > 0))*/
( strlen ( lineGetData ( t_line - > t_line ) ) > 0 ) )
2004-06-23 16:25:08 +00:00
break ;
2004-02-03 22:54:59 +00:00
if ( t_line = = NULL ) {
cli_dbgmsg ( " Empty part \n " ) ;
2004-06-30 19:50:31 +00:00
/*
* Remove this part unless there ' s
2005-04-21 11:13:41 +00:00
* a binhex portion somewhere in
2004-06-30 19:50:31 +00:00
* the complete message that we may
* throw away by mistake if the MIME
* encoding information is incorrect
*/
2006-07-03 12:09:58 +00:00
if ( mainMessage & &
( binhexBegin ( mainMessage ) = = NULL ) ) {
2004-06-30 19:50:31 +00:00
messageDestroy ( aMessage ) ;
- - multiparts ;
}
2004-02-03 22:54:59 +00:00
continue ;
}
do {
2004-08-21 12:01:07 +00:00
const char * line = lineGetData ( t_line - > t_line ) ;
2003-07-29 15:48:06 +00:00
2005-05-31 18:15:40 +00:00
/*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
multiparts , inMimeHead , inhead , boundary , line ,
2004-11-26 21:54:00 +00:00
t_line - > t_next & & t_line - > t_next - > t_line ? lineGetData ( t_line - > t_next - > t_line ) : " (null) " ) ; */
2003-07-29 15:48:06 +00:00
2004-10-31 09:32:05 +00:00
if ( inMimeHead ) { /* continuation line */
2004-06-22 04:08:02 +00:00
if ( line = = NULL ) {
2004-11-26 17:32:42 +00:00
/*inhead =*/ inMimeHead = 0 ;
2004-06-22 04:08:02 +00:00
continue ;
}
2004-04-07 18:19:39 +00:00
/*
* Handle continuation lines
* because the previous line
2004-09-16 11:22:03 +00:00
* ended with a ; or this line
* starts with a white space
2004-04-07 18:19:39 +00:00
*/
2004-09-16 11:22:03 +00:00
cli_dbgmsg ( " Multipart %d: About to add mime Argument '%s' \n " ,
multiparts , line ) ;
2004-04-07 18:19:39 +00:00
/*
* Handle the case when it
* isn ' t really a continuation
* line :
* Content - Type : application / octet - stream ;
* Content - Transfer - Encoding : base64
*/
2006-06-28 21:07:36 +00:00
parseEmailHeader ( aMessage , line , mctx - > rfc821Table ) ;
2004-04-07 18:19:39 +00:00
2003-07-29 15:48:06 +00:00
while ( isspace ( ( int ) * line ) )
line + + ;
if ( * line = = ' \0 ' ) {
inhead = inMimeHead = 0 ;
continue ;
}
2006-07-12 19:22:50 +00:00
inMimeHead = FALSE ;
2003-07-29 15:48:06 +00:00
messageAddArgument ( aMessage , line ) ;
2004-10-31 09:32:05 +00:00
} else if ( inhead ) { /* handling normal headers */
2006-07-12 21:21:25 +00:00
/*int quotes;*/
2004-12-01 13:16:08 +00:00
char * fullline , * ptr ;
2004-11-11 22:18:10 +00:00
2004-06-22 04:08:02 +00:00
if ( line = = NULL ) {
2005-01-31 11:02:43 +00:00
/*
* empty line , should the end of the headers ,
* but some base64 decoders , e . g . uudeview , are broken
* and will handle this type of entry , decoding the
* base64 content . . .
* Content - Type : application / octet - stream ; name = text . zip
* Content - Transfer - Encoding : base64
* Content - Disposition : attachment ; filename = " text.zip "
2005-03-18 18:12:25 +00:00
*
2005-01-31 11:02:43 +00:00
* Content - Disposition : attachment ;
* filename = text . zip
* Content - Type : application / octet - stream ;
* name = text . zip
* Content - Transfer - Encoding : base64
2005-03-18 18:12:25 +00:00
*
2005-01-31 11:02:43 +00:00
* UEsDBAoAAAAAAACgPjJ2RHw676gAAO + oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
*/
2006-07-24 12:14:46 +00:00
const text * next = t_line - > t_next ;
2005-01-31 11:02:43 +00:00
if ( next & & next - > t_line ) {
const char * data = lineGetData ( next - > t_line ) ;
2005-02-06 18:25:10 +00:00
if ( ( messageGetEncoding ( aMessage ) = = NOENCODING ) & &
2006-07-24 12:14:46 +00:00
( messageGetMimeType ( aMessage ) = = APPLICATION ) & &
strstr ( data , " base64 " ) ) {
2005-02-16 22:20:49 +00:00
/*
* Handle this nightmare ( note the blank
* line in the header and the incorrect
* content - transfer - encoding header )
*
* Content - Type : application / octet - stream ; name = " zipped_files.EXEX-Spanska: Yes
*
* r - Encoding : base64
* Content - Disposition : attachment ; filename = " zipped_files.EXE "
*/
2006-07-24 12:14:46 +00:00
messageSetEncoding ( aMessage , " base64 " ) ;
cli_dbgmsg ( " Ignoring fake end of headers \n " ) ;
continue ;
}
2005-03-15 18:01:25 +00:00
if ( ( strncmp ( data , " Content " , 7 ) = = 0 ) | |
( strncmp ( data , " filename= " , 9 ) = = 0 ) ) {
2005-01-31 11:02:43 +00:00
cli_dbgmsg ( " Ignoring fake end of headers \n " ) ;
continue ;
}
}
2004-11-26 17:32:42 +00:00
cli_dbgmsg ( " Multipart %d: End of header information \n " ,
multiparts ) ;
2003-07-29 15:48:06 +00:00
inhead = 0 ;
continue ;
}
2004-02-02 09:53:53 +00:00
if ( isspace ( ( int ) * line ) ) {
/*
* The first line is
* continuation line .
* This is tricky
* to handle , but
* all we can do is our
* best
*/
cli_dbgmsg ( " Part %d starts with a continuation line \n " ,
multiparts ) ;
messageAddArgument ( aMessage , line ) ;
/*
* Give it a default
* MIME type since
* that may be the
* missing line
*
* Choose application to
* force a save
*/
if ( messageGetMimeType ( aMessage ) = = NOMIME )
messageSetMimeType ( aMessage , " application " ) ;
continue ;
}
2004-12-01 13:16:08 +00:00
inMimeHead = FALSE ;
2004-08-21 12:01:07 +00:00
2005-04-28 14:46:44 +00:00
assert ( strlen ( line ) < = RFC2821LENGTH ) ;
2004-10-31 09:32:05 +00:00
2005-02-16 22:20:49 +00:00
fullline = rfc822comments ( line , NULL ) ;
2004-12-01 13:16:08 +00:00
if ( fullline = = NULL )
fullline = strdup ( line ) ;
2004-11-11 22:18:10 +00:00
2006-07-12 21:21:25 +00:00
/*quotes = count_quotes(fullline);*/
2004-10-31 09:32:05 +00:00
2004-12-01 13:16:08 +00:00
/*
* Fold next lines to the end of this
* if they start with a white space
* or if this line has an odd number of quotes :
* Content - Type : application / octet - stream ; name = " foo
* "
*/
2006-07-24 12:14:46 +00:00
while ( t_line & & next_is_folded_header ( t_line ) ) {
const char * data ;
t_line = t_line - > t_next ;
data = lineGetData ( t_line - > t_line ) ;
2004-10-31 09:32:05 +00:00
2005-11-23 11:21:45 +00:00
if ( data [ 1 ] = = ' \0 ' ) {
/*
* Broken message : the
* blank line at the end
* of the headers isn ' t blank -
* it contains a space
*/
cli_dbgmsg ( " Multipart %d: headers not terminated by blank line \n " ,
multiparts ) ;
inhead = FALSE ;
break ;
}
2004-12-01 13:16:08 +00:00
ptr = cli_realloc ( fullline ,
strlen ( fullline ) + strlen ( data ) + 1 ) ;
2004-01-09 14:46:59 +00:00
2004-12-01 13:16:08 +00:00
if ( ptr = = NULL )
break ;
2004-09-16 11:22:03 +00:00
2004-12-01 13:16:08 +00:00
fullline = ptr ;
strcat ( fullline , data ) ;
2004-11-11 22:18:10 +00:00
2006-07-12 21:21:25 +00:00
/*quotes = count_quotes(data);*/
2004-09-16 11:22:03 +00:00
}
2006-07-24 12:14:46 +00:00
2004-12-01 13:16:08 +00:00
cli_dbgmsg ( " Multipart %d: About to parse folded header '%s' \n " ,
multiparts , fullline ) ;
2006-06-28 21:07:36 +00:00
parseEmailHeader ( aMessage , fullline , mctx - > rfc821Table ) ;
2004-12-01 13:16:08 +00:00
free ( fullline ) ;
2003-07-29 15:48:06 +00:00
} else if ( endOfMessage ( line , boundary ) ) {
/*
* Some viruses put information
* * after * the end of message ,
* which presumably some broken
* mail clients find , so we
* can ' t assume that this
* is the end of the message
*/
/* t_line = NULL;*/
break ;
2004-11-26 21:54:00 +00:00
} else if ( boundaryStart ( line , boundary ) ) {
inhead = 1 ;
break ;
2004-02-18 10:09:46 +00:00
} else {
2004-08-21 12:01:07 +00:00
if ( messageAddLine ( aMessage , t_line - > t_line ) < 0 )
2004-07-26 17:02:56 +00:00
break ;
2004-02-18 10:09:46 +00:00
lines + + ;
}
2004-02-03 22:54:59 +00:00
} while ( ( t_line = t_line - > t_next ) ! = NULL ) ;
2004-02-18 10:09:46 +00:00
cli_dbgmsg ( " Part %d has %d lines \n " ,
multiparts , lines ) ;
2006-07-03 12:09:58 +00:00
/*
* Only save in the array of messages if some
* decision will be taken on whether to scan .
* If all parts will be scanned then save to
* file straight away
*/
switch ( subtype ) {
case MIXED :
case ALTERNATIVE :
case REPORT :
case DIGEST :
case APPLEDOUBLE :
case KNOWBOT :
case - 1 :
mainMessage = do_multipart ( mainMessage ,
messages , multiparts ,
& rc , mctx , messageIn ,
& aText ) ;
- - multiparts ;
if ( rc = = 3 )
infected = TRUE ;
break ;
default :
messageClean ( aMessage ) ;
}
2003-07-29 15:48:06 +00:00
}
free ( ( char * ) boundary ) ;
2004-10-04 12:21:11 +00:00
/*
2004-10-24 04:38:09 +00:00
* Preprocess . Anything special to be done before
* we handle the multiparts ?
2004-10-04 12:21:11 +00:00
*/
2006-07-03 12:09:58 +00:00
switch ( subtype ) {
2004-10-24 04:38:09 +00:00
case KNOWBOT :
/* TODO */
cli_dbgmsg ( " multipart/knowbot parsed as multipart/mixed for now \n " ) ;
mimeSubtype = " mixed " ;
break ;
2004-11-27 14:41:41 +00:00
case - 1 :
/*
* According to section 7.2 .6 of
* RFC1521 , unrecognised multiparts
* should be treated as multipart / mixed .
*/
2004-11-27 21:56:41 +00:00
cli_dbgmsg ( " Unsupported multipart format `%s', parsed as mixed \n " , mimeSubtype ) ;
2004-11-27 14:41:41 +00:00
mimeSubtype = " mixed " ;
break ;
2004-10-24 04:38:09 +00:00
}
2004-10-04 12:21:11 +00:00
2004-03-31 17:01:18 +00:00
/*
* We ' ve finished message we ' re parsing
*/
if ( mainMessage & & ( mainMessage ! = messageIn ) ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
2004-01-23 10:38:22 +00:00
}
2003-07-29 15:48:06 +00:00
2006-07-03 12:09:58 +00:00
cli_dbgmsg ( " The message has %d parts \n " , multiparts ) ;
if ( ( ( multiparts = = 0 ) | | infected ) & & ( aText = = NULL ) ) {
2006-07-30 10:10:40 +00:00
if ( messages ) {
for ( i = 0 ; i < multiparts ; i + + )
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
2004-06-18 10:09:33 +00:00
free ( messages ) ;
2006-07-30 10:10:40 +00:00
}
2006-07-03 12:09:58 +00:00
/*
* FIXME : we could return 2 here when we have
* saved stuff earlier
*/
return ( rc = = 3 ) ? 3 : 2 ; /* Nothing to do */
2004-06-18 10:09:33 +00:00
}
2004-03-31 17:01:18 +00:00
2004-08-17 08:31:58 +00:00
cli_dbgmsg ( " Find out the multipart type (%s) \n " , mimeSubtype ) ;
2003-07-29 15:48:06 +00:00
2004-10-04 12:21:11 +00:00
/*
* We now have all the parts of the multipart message
* in the messages array :
* message * messages [ multiparts ]
* Let ' s decide what to do with them all
*/
2006-06-28 21:07:36 +00:00
switch ( tableFind ( mctx - > subtypeTable , mimeSubtype ) ) {
2003-07-29 15:48:06 +00:00
case RELATED :
2003-12-14 18:08:29 +00:00
cli_dbgmsg ( " Multipart related handler \n " ) ;
2003-07-29 15:48:06 +00:00
/*
2003-11-04 08:28:47 +00:00
* Have a look to see if there ' s HTML code
* which will need scanning
2003-07-29 15:48:06 +00:00
*/
aMessage = NULL ;
assert ( multiparts > 0 ) ;
2003-08-02 22:37:52 +00:00
htmltextPart = getTextPart ( messages , multiparts ) ;
2003-07-29 15:48:06 +00:00
2003-08-02 22:37:52 +00:00
if ( htmltextPart > = 0 )
aText = textAddMessage ( aText , messages [ htmltextPart ] ) ;
2003-07-29 15:48:06 +00:00
else
/*
2003-11-04 08:28:47 +00:00
* There isn ' t an HTML bit . If there ' s a
* multipart bit , it ' ll may be in there
* somewhere
2003-07-29 15:48:06 +00:00
*/
for ( i = 0 ; i < multiparts ; i + + )
if ( messageGetMimeType ( messages [ i ] ) = = MULTIPART ) {
aMessage = messages [ i ] ;
2003-08-02 22:37:52 +00:00
htmltextPart = i ;
2003-07-29 15:48:06 +00:00
break ;
}
2004-09-20 17:09:25 +00:00
if ( htmltextPart = = - 1 )
2006-05-27 14:35:14 +00:00
cli_dbgmsg ( " No HTML code found to be scanned \n " ) ;
2004-09-20 17:09:25 +00:00
else {
2006-06-28 21:07:36 +00:00
rc = parseEmailBody ( aMessage , aText , mctx ) ;
2004-09-20 17:09:25 +00:00
if ( rc = = 1 ) {
assert ( aMessage = = messages [ htmltextPart ] ) ;
messageDestroy ( aMessage ) ;
messages [ htmltextPart ] = NULL ;
}
}
2003-07-29 15:48:06 +00:00
/*
* Fixed based on an idea from Stephen White < stephen @ earth . li >
* The message is confused about the difference
* between alternative and related . Badtrans . B
* suffers from this problem .
*
* Fall through in this case :
* Content - Type : multipart / related ;
* type = " multipart/alternative "
*/
2003-12-06 04:05:18 +00:00
/*
* Changed to always fall through based on
* an idea from Michael Dankov < misha @ btrc . ru >
* that some viruses are completely confused
* about the difference between related
* and mixed
*/
/*cptr = messageFindArgument(mainMessage, "type");
2003-07-29 15:48:06 +00:00
if ( cptr = = NULL )
break ;
isAlternative = ( bool ) ( strcasecmp ( cptr , " multipart/alternative " ) = = 0 ) ;
free ( ( char * ) cptr ) ;
if ( ! isAlternative )
2003-12-06 04:05:18 +00:00
break ; */
2004-09-23 08:47:10 +00:00
case DIGEST :
/*
* According to section 5.1 .5 RFC2046 , the
* default mime type of multipart / digest parts
* is message / rfc822
*
* We consider them as alternative , wrong in
* the strictest sense since they aren ' t
* alternatives - all parts a valid - but it ' s
* OK for our needs since it means each part
* will be scanned
*/
2003-07-29 15:48:06 +00:00
case ALTERNATIVE :
cli_dbgmsg ( " Multipart alternative handler \n " ) ;
/*
* Fall through - some clients are broken and
* say alternative instead of mixed . The Klez
2004-11-27 14:41:41 +00:00
* virus is broken that way , and anyway we
* wish to scan all of the alternatives
2003-07-29 15:48:06 +00:00
*/
case REPORT :
/*
* According to section 1 of RFC1892 , the
* syntax of multipart / report is the same
* as multipart / mixed . There are some required
* parameters , but there ' s no need for us to
* verify that they exist
*/
case MIXED :
2003-10-12 12:39:49 +00:00
case APPLEDOUBLE : /* not really supported */
2003-07-29 15:48:06 +00:00
/*
* Look for attachments
*
* Not all formats are supported . If an
* unsupported format turns out to be
* common enough to implement , it is a simple
* matter to add it
*/
2004-01-23 10:38:22 +00:00
if ( aText ) {
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
2003-07-29 15:48:06 +00:00
mainMessage = NULL ;
2004-01-23 10:38:22 +00:00
}
2003-07-29 15:48:06 +00:00
cli_dbgmsg ( " Mixed message with %d parts \n " , multiparts ) ;
for ( i = 0 ; i < multiparts ; i + + ) {
2006-06-28 21:07:36 +00:00
mainMessage = do_multipart ( mainMessage ,
messages , i , & rc , mctx ,
messageIn , & aText ) ;
if ( rc = = 3 ) {
2006-05-03 15:41:44 +00:00
infected = TRUE ;
break ;
}
2003-07-29 15:48:06 +00:00
}
2006-06-28 21:07:36 +00:00
/* rc = parseEmailBody(NULL, NULL, mctx); */
2003-07-29 15:48:06 +00:00
break ;
case SIGNED :
case PARALLEL :
/*
* If we ' re here it could be because we have a
* multipart / mixed message , consisting of a
* message followed by an attachment . That
* message itself is a multipart / alternative
* message and we need to dig out the plain
* text part of that alternative
*/
2003-08-02 22:37:52 +00:00
htmltextPart = getTextPart ( messages , multiparts ) ;
if ( htmltextPart = = - 1 )
htmltextPart = 0 ;
2003-07-29 15:48:06 +00:00
2006-06-28 21:07:36 +00:00
rc = parseEmailBody ( messages [ htmltextPart ] , aText , mctx ) ;
2004-10-04 12:21:11 +00:00
break ;
case ENCRYPTED :
rc = 0 ;
2004-10-24 04:38:09 +00:00
protocol = ( char * ) messageFindArgument ( mainMessage , " protocol " ) ;
2004-10-04 12:21:11 +00:00
if ( protocol ) {
if ( strcasecmp ( protocol , " application/pgp-encrypted " ) = = 0 ) {
/* RFC2015 */
cli_warnmsg ( " PGP encoded attachment not scanned \n " ) ;
rc = 2 ;
} else
2005-05-11 15:24:33 +00:00
cli_warnmsg ( " Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net \n " , protocol ) ;
2004-10-04 12:21:11 +00:00
free ( protocol ) ;
} else
2004-12-19 23:19:54 +00:00
cli_dbgmsg ( " Encryption method missing protocol name \n " ) ;
2004-10-04 12:21:11 +00:00
2003-07-29 15:48:06 +00:00
break ;
default :
2004-11-27 14:41:41 +00:00
assert ( 0 ) ;
2003-07-29 15:48:06 +00:00
}
2004-01-23 10:38:22 +00:00
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
2004-11-19 11:32:16 +00:00
if ( aText & & ( textIn = = NULL ) ) {
2006-05-03 15:41:44 +00:00
if ( ( ! infected ) & & ( fb = fileblobCreate ( ) ) ! = NULL ) {
2004-12-01 13:16:08 +00:00
cli_dbgmsg ( " Save non mime and/or text/plain part \n " ) ;
2006-06-28 21:07:36 +00:00
fileblobSetFilename ( fb , mctx - > dir , " textpart " ) ;
2004-11-28 16:27:28 +00:00
/*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
2006-06-28 21:07:36 +00:00
fileblobSetCTX ( fb , mctx - > ctx ) ;
2006-07-03 09:19:15 +00:00
( void ) textToFileblob ( aText , fb , 1 ) ;
2004-11-19 11:32:16 +00:00
fileblobDestroy ( fb ) ;
}
2003-08-29 14:27:15 +00:00
textDestroy ( aText ) ;
2004-11-19 11:32:16 +00:00
}
2003-08-29 14:27:15 +00:00
2004-11-22 15:19:43 +00:00
for ( i = 0 ; i < multiparts ; i + + )
if ( messages [ i ] )
messageDestroy ( messages [ i ] ) ;
2004-06-18 10:09:33 +00:00
if ( messages )
free ( messages ) ;
2003-07-29 15:48:06 +00:00
return rc ;
case MESSAGE :
/*
* Check for forbidden encodings
*/
switch ( messageGetEncoding ( mainMessage ) ) {
case NOENCODING :
case EIGHTBIT :
case BINARY :
break ;
default :
2003-08-29 14:27:15 +00:00
cli_warnmsg ( " MIME type 'message' cannot be decoded \n " ) ;
2003-07-29 15:48:06 +00:00
break ;
}
2004-10-05 15:48:47 +00:00
rc = 0 ;
2003-08-29 14:27:15 +00:00
if ( ( strcasecmp ( mimeSubtype , " rfc822 " ) = = 0 ) | |
( strcasecmp ( mimeSubtype , " delivery-status " ) = = 0 ) ) {
2006-06-28 21:07:36 +00:00
message * m = parseEmailHeaders ( mainMessage , mctx - > rfc821Table ) ;
2004-03-17 19:48:33 +00:00
if ( m ) {
2006-05-27 14:35:13 +00:00
cli_dbgmsg ( " Decode rfc822 \n " ) ;
2004-03-17 19:48:33 +00:00
2006-06-28 21:07:36 +00:00
messageSetCTX ( m , mctx - > ctx ) ;
2006-05-03 15:41:44 +00:00
2004-04-05 09:37:13 +00:00
if ( mainMessage & & ( mainMessage ! = messageIn ) ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
2004-09-20 17:09:25 +00:00
} else
messageReset ( mainMessage ) ;
2004-03-17 19:48:33 +00:00
if ( messageGetBody ( m ) )
2006-06-28 21:07:36 +00:00
rc = parseEmailBody ( m , NULL , mctx ) ;
2004-03-17 19:48:33 +00:00
messageDestroy ( m ) ;
}
2003-07-29 15:48:06 +00:00
break ;
2004-10-06 17:21:30 +00:00
} else if ( strcasecmp ( mimeSubtype , " disposition-notification " ) = = 0 ) {
2004-09-22 15:53:45 +00:00
/* RFC 2298 - handle like a normal email */
2004-10-06 17:21:30 +00:00
rc = 1 ;
2004-09-22 15:53:45 +00:00
break ;
2004-10-06 17:21:30 +00:00
} else if ( strcasecmp ( mimeSubtype , " partial " ) = = 0 ) {
2004-10-05 15:48:47 +00:00
# ifdef PARTIAL_DIR
/* RFC1341 message split over many emails */
2006-06-28 21:07:36 +00:00
if ( rfc1341 ( mainMessage , mctx - > dir ) > = 0 )
2004-10-05 15:48:47 +00:00
rc = 1 ;
# else
2004-10-01 07:58:27 +00:00
cli_warnmsg ( " Partial message received from MUA/MTA - message cannot be scanned \n " ) ;
2004-10-05 15:48:47 +00:00
rc = 0 ;
# endif
} else if ( strcasecmp ( mimeSubtype , " external-body " ) = = 0 )
2004-10-01 07:58:27 +00:00
/* TODO */
2003-07-29 15:48:06 +00:00
cli_warnmsg ( " Attempt to send Content-type message/external-body trapped " ) ;
2003-09-14 19:00:00 +00:00
else
2005-05-11 15:24:33 +00:00
cli_warnmsg ( " Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net \n " , mimeSubtype ) ;
2003-07-29 15:48:06 +00:00
2004-10-05 15:48:47 +00:00
2004-01-23 10:38:22 +00:00
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
2004-06-18 10:09:33 +00:00
if ( messages )
free ( messages ) ;
2004-10-05 15:48:47 +00:00
return rc ;
2003-07-29 15:48:06 +00:00
case APPLICATION :
2004-12-19 23:19:54 +00:00
/*cptr = messageGetMimeSubtype(mainMessage);
2003-08-02 22:37:52 +00:00
2004-12-19 23:19:54 +00:00
if ( ( strcasecmp ( cptr , " octet-stream " ) = = 0 ) | |
2003-11-06 05:07:46 +00:00
( strcasecmp ( cptr , " x-msdownload " ) = = 0 ) ) { */
{
2006-07-01 16:21:03 +00:00
fb = messageToFileblob ( mainMessage , mctx - > dir , 1 ) ;
2003-07-29 15:48:06 +00:00
2004-08-22 10:37:32 +00:00
if ( fb ) {
cli_dbgmsg ( " Saving main message as attachment \n " ) ;
fileblobDestroy ( fb ) ;
2005-01-07 13:51:49 +00:00
if ( mainMessage ! = messageIn ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else
messageReset ( mainMessage ) ;
2003-07-29 15:48:06 +00:00
}
2003-11-06 05:07:46 +00:00
} /*else
cli_warnmsg ( " Discarded application not sent as attachment \n " ) ; */
2003-07-29 15:48:06 +00:00
break ;
case AUDIO :
case VIDEO :
case IMAGE :
break ;
default :
cli_warnmsg ( " Message received with unknown mime encoding " ) ;
break ;
}
2006-07-01 16:21:03 +00:00
2006-07-30 10:10:40 +00:00
if ( messages ) {
/* "can't happen" */
cli_warnmsg ( " messages != NULL, report to bugs@clamav.net \n " ) ;
2006-07-01 16:21:03 +00:00
free ( messages ) ;
2006-07-30 10:10:40 +00:00
}
2003-07-29 15:48:06 +00:00
}
2004-08-04 19:00:43 +00:00
if ( aText & & ( textIn = = NULL ) ) {
2005-01-06 11:55:38 +00:00
/* Look for a bounce in the text (non mime encoded) portion */
const text * t ;
2004-11-22 15:19:43 +00:00
2005-01-06 11:55:38 +00:00
for ( t = aText ; t ; t = t - > t_next ) {
const line_t * l = t - > t_line ;
2005-02-18 18:05:31 +00:00
const text * lookahead , * topofbounce ;
2005-01-06 11:55:38 +00:00
const char * s ;
2005-02-18 18:05:31 +00:00
bool inheader ;
2004-11-22 15:19:43 +00:00
2005-01-06 11:55:38 +00:00
if ( l = = NULL )
continue ;
2006-05-03 15:41:44 +00:00
if ( ! isBounceStart ( lineGetData ( l ) ) )
2005-01-06 11:55:38 +00:00
continue ;
/*
* We ' ve found what looks like the start of a bounce
* message . Only bother saving if it really is a bounce
* message , this helps to speed up scanning of ping - pong
* messages that have lots of bounces within bounces in
* them
*/
for ( lookahead = t - > t_next ; lookahead ; lookahead = lookahead - > t_next ) {
l = lookahead - > t_line ;
if ( l = = NULL )
break ;
s = lineGetData ( l ) ;
if ( strncasecmp ( s , " Content-Type: " , 13 ) = = 0 )
/*
* Don ' t bother with plain / text or
* plain / html
*/
if ( strstr ( s , " text/ " ) = = NULL )
2005-02-17 19:06:32 +00:00
/*
* Don ' t bother to save the unuseful
* part
*/
2005-01-06 11:55:38 +00:00
break ;
}
if ( lookahead & & ( lookahead - > t_line = = NULL ) ) {
cli_dbgmsg ( " Non mime part bounce message is not mime encoded, so it will not be scanned \n " ) ;
t = lookahead ;
/* look for next bounce message */
continue ;
}
2005-02-17 19:06:32 +00:00
/*
* Prescan the bounce message to see if there ' s likely
* to be anything nasty .
* This algorithm is hand crafted and may be breakable
* so all submissions are welcome . It ' s best NOT to
* remove this however you may be tempted , because it
* significantly speeds up the scanning of multiple
* bounces ( i . e . bounces within many bounces )
*/
for ( ; lookahead ; lookahead = lookahead - > t_next ) {
l = lookahead - > t_line ;
if ( l ) {
s = lineGetData ( l ) ;
if ( ( strncasecmp ( s , " Content-Type: " , 13 ) = = 0 ) & &
( strstr ( s , " multipart/ " ) = = NULL ) & &
( strstr ( s , " message/rfc822 " ) = = NULL ) & &
( strstr ( s , " text/plain " ) = = NULL ) )
break ;
}
}
if ( lookahead = = NULL ) {
2005-03-02 20:08:24 +00:00
cli_dbgmsg ( " cli_mbox: I believe it's plain text which must be clean \n " ) ;
2005-02-17 19:06:32 +00:00
/* nothing here, move along please */
break ;
}
2005-02-18 18:05:31 +00:00
if ( ( fb = fileblobCreate ( ) ) = = NULL )
break ;
cli_dbgmsg ( " Save non mime part bounce message \n " ) ;
2006-06-28 21:07:36 +00:00
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
2005-02-18 18:05:31 +00:00
fileblobAddData ( fb , ( unsigned char * ) " Received: by clamd (bounce) \n " , 28 ) ;
2006-06-28 21:07:36 +00:00
fileblobSetCTX ( fb , mctx - > ctx ) ;
2005-02-18 18:05:31 +00:00
inheader = TRUE ;
topofbounce = NULL ;
2006-05-04 10:37:03 +00:00
do {
2005-02-18 18:05:31 +00:00
l = t - > t_line ;
if ( l = = NULL ) {
if ( inheader ) {
inheader = FALSE ;
topofbounce = t ;
}
} else {
s = lineGetData ( l ) ;
fileblobAddData ( fb , ( unsigned char * ) s , strlen ( s ) ) ;
}
fileblobAddData ( fb , ( unsigned char * ) " \n " , 1 ) ;
lookahead = t - > t_next ;
if ( lookahead = = NULL )
break ;
t = lookahead ;
l = t - > t_line ;
if ( ( ! inheader ) & & l ) {
s = lineGetData ( l ) ;
2006-05-03 15:41:44 +00:00
if ( isBounceStart ( s ) ) {
2006-05-02 15:21:59 +00:00
cli_dbgmsg ( " Found the start of another bounce candidate (%s) \n " , s ) ;
2005-02-18 18:05:31 +00:00
break ;
}
}
2006-05-04 10:37:03 +00:00
} while ( ! fileblobContainsVirus ( fb ) ) ;
2005-02-18 18:05:31 +00:00
fileblobDestroy ( fb ) ;
if ( topofbounce )
t = topofbounce ;
/*
* Don ' t do this - it slows bugs . txt
*/
/*if(mainMessage)
mainMessage - > bounce = NULL ; */
2005-01-06 11:55:38 +00:00
}
2004-08-04 19:00:43 +00:00
textDestroy ( aText ) ;
aText = NULL ;
}
2004-08-22 20:20:14 +00:00
/*
* No attachments - scan the text portions , often files
* are hidden in HTML code
*/
2006-06-28 16:06:07 +00:00
if ( mainMessage & & ( rc ! = 3 ) ) {
2006-07-04 08:40:46 +00:00
text * t_line ;
2003-07-29 15:48:06 +00:00
/*
2004-08-22 20:20:14 +00:00
* Look for uu - encoded main file
2003-07-29 15:48:06 +00:00
*/
2005-04-21 11:13:41 +00:00
if ( ( encodingLine ( mainMessage ) ! = NULL ) & &
2006-07-04 08:40:46 +00:00
( ( t_line = bounceBegin ( mainMessage ) ) ! = NULL ) ) {
if ( exportBounceMessage ( t_line , mctx ) )
rc = 1 ;
2004-08-22 20:20:14 +00:00
} else {
bool saveIt ;
2003-08-02 22:37:52 +00:00
2004-08-22 20:20:14 +00:00
if ( messageGetMimeType ( mainMessage ) = = MESSAGE )
2003-08-06 21:22:15 +00:00
/*
2004-08-22 20:20:14 +00:00
* Quick peek , if the encapsulated
* message has no
* content encoding statement don ' t
* bother saving to scan , it ' s safe
2003-08-06 21:22:15 +00:00
*/
2006-07-04 08:40:46 +00:00
saveIt = ( bool ) ( encodingLine ( mainMessage ) ! = NULL ) ;
2004-08-22 20:20:14 +00:00
else if ( ( t_line = encodingLine ( mainMessage ) ) ! = NULL ) {
2004-05-10 11:35:11 +00:00
/*
2004-08-22 20:20:14 +00:00
* Some bounces include the message
* body without the headers .
2005-02-18 18:05:31 +00:00
* FIXME : Unfortunately this generates a
2004-08-22 20:20:14 +00:00
* lot of false positives that a bounce
* has been found when it hasn ' t .
2004-05-10 11:35:11 +00:00
*/
2004-08-22 20:20:14 +00:00
if ( ( fb = fileblobCreate ( ) ) ! = NULL ) {
2005-03-28 11:06:21 +00:00
cli_dbgmsg ( " Found a bounce message with no header at '%s' \n " ,
lineGetData ( t_line - > t_line ) ) ;
2006-06-28 21:07:36 +00:00
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
2004-12-16 15:29:08 +00:00
fileblobAddData ( fb ,
( const unsigned char * ) " Received: by clamd (bounce) \n " ,
28 ) ;
2004-01-28 10:16:51 +00:00
2006-05-04 10:37:03 +00:00
/*fileblobSetCTX(fb, ctx);*/
2006-07-01 21:04:47 +00:00
fb = textToFileblob ( t_line , fb , 1 ) ;
2004-03-18 21:52:51 +00:00
2004-08-22 20:20:14 +00:00
fileblobDestroy ( fb ) ;
2004-03-18 21:52:51 +00:00
}
2004-08-22 20:20:14 +00:00
saveIt = FALSE ;
2006-07-01 16:21:03 +00:00
} else
2004-08-22 20:20:14 +00:00
/*
* Save the entire text portion ,
* since it it may be an HTML file with
2006-06-28 16:06:07 +00:00
* a JavaScript virus or a phish
2004-08-22 20:20:14 +00:00
*/
saveIt = TRUE ;
2003-07-29 15:48:06 +00:00
2004-08-22 20:20:14 +00:00
if ( saveIt ) {
cli_dbgmsg ( " Saving text part to scan \n " ) ;
2006-07-03 09:19:15 +00:00
saveTextPart ( mainMessage , mctx - > dir , 1 ) ;
2004-09-20 17:09:25 +00:00
if ( mainMessage ! = messageIn ) {
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else
messageReset ( mainMessage ) ;
rc = 1 ;
2003-08-29 14:27:15 +00:00
}
2003-07-29 15:48:06 +00:00
}
2004-08-22 20:20:14 +00:00
} else
2006-07-01 16:21:03 +00:00
rc = 2 ; /* nothing saved */
2003-07-29 15:48:06 +00:00
2004-01-23 10:38:22 +00:00
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
2006-06-28 16:06:07 +00:00
if ( ( rc ! = 0 ) & & infected )
rc = 3 ;
2006-05-03 15:41:44 +00:00
2003-12-14 18:08:29 +00:00
cli_dbgmsg ( " parseEmailBody() returning %d \n " , rc ) ;
2003-07-29 15:48:06 +00:00
2003-12-14 18:08:29 +00:00
return rc ;
2003-07-29 15:48:06 +00:00
}
/*
* Is the current line the start of a new section ?
*
* New sections start with - - boundary
*/
static int
boundaryStart ( const char * line , const char * boundary )
{
2005-02-16 22:20:49 +00:00
char * ptr , * out ;
2004-11-15 13:58:50 +00:00
int rc ;
2005-04-28 14:46:44 +00:00
char buf [ RFC2821LENGTH + 1 ] ;
2004-11-11 22:18:10 +00:00
2004-07-06 09:32:45 +00:00
if ( line = = NULL )
return 0 ; /* empty line */
2006-07-08 19:58:28 +00:00
if ( boundary = = NULL )
return 0 ;
2004-07-06 09:32:45 +00:00
2004-11-19 11:32:16 +00:00
/*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
2004-11-11 22:18:10 +00:00
2005-02-18 18:05:31 +00:00
if ( ( * line ! = ' - ' ) & & ( * line ! = ' ( ' ) )
return 0 ;
if ( strchr ( line , ' - ' ) = = NULL )
return 0 ;
2005-02-16 22:20:49 +00:00
if ( strlen ( line ) < = sizeof ( buf ) ) {
out = NULL ;
ptr = rfc822comments ( line , buf ) ;
} else
out = ptr = rfc822comments ( line , NULL ) ;
2004-11-11 22:18:10 +00:00
if ( ptr = = NULL )
2004-11-26 12:05:17 +00:00
ptr = ( char * ) line ;
2004-11-11 22:18:10 +00:00
if ( * ptr + + ! = ' - ' ) {
2005-02-16 22:20:49 +00:00
if ( out )
free ( out ) ;
2004-07-06 09:32:45 +00:00
return 0 ;
2004-11-11 22:18:10 +00:00
}
2004-07-06 09:32:45 +00:00
2003-07-29 15:48:06 +00:00
/*
2004-07-06 09:32:45 +00:00
* Gibe . B3 is broken , it has :
2003-07-29 15:48:06 +00:00
* boundary = " ---- =_NextPart_000_01C31177.9DC7C000 "
* but it ' s boundaries look like
* - - - - - - = _NextPart_000_01C31177 .9 DC7C000
2004-07-06 09:32:45 +00:00
* notice the one too few ' - ' .
* Presumably this is a deliberate exploitation of a bug in some mail
* clients .
*
* The trouble is that this creates a lot of false positives for
* boundary conditions , if we ' re too lax about matches . We do our level
* best to avoid these false positives . For example if we have
* boundary = " 1 " we want to ensure that we don ' t break out of every line
* that has - 1 in it instead of starting - - 1. This needs some more work .
2005-03-02 20:08:24 +00:00
*
* Look with and without RFC822 comments stripped , I ' ve seen some
* samples where ( ) are taken as comments in boundaries and some where
* they ' re not . Irrespective of whatever RFC2822 says we need to find
* viruses in both types of mails
2003-07-29 15:48:06 +00:00
*/
2005-03-02 20:08:24 +00:00
if ( ( strstr ( ptr , boundary ) ! = NULL ) | | ( strstr ( line , boundary ) ! = NULL ) )
2004-11-15 13:58:50 +00:00
rc = 1 ;
else if ( * ptr + + ! = ' - ' )
rc = 0 ;
else
2005-02-06 09:21:55 +00:00
rc = ( strcasecmp ( ptr , boundary ) = = 0 ) ;
2004-11-15 13:58:50 +00:00
2005-02-16 22:20:49 +00:00
if ( out )
free ( out ) ;
2004-11-15 13:58:50 +00:00
if ( rc = = 1 )
cli_dbgmsg ( " boundaryStart: found %s in %s \n " , boundary , line ) ;
return rc ;
2003-07-29 15:48:06 +00:00
}
/*
* Is the current line the end ?
*
* The message ends with with - - boundary - -
*/
static int
endOfMessage ( const char * line , const char * boundary )
{
size_t len ;
2004-06-22 04:08:02 +00:00
if ( line = = NULL )
return 0 ;
2004-11-19 11:32:16 +00:00
/*cli_dbgmsg("endOfMessage: line = '%s' boundary = '%s'\n", line, boundary);*/
2003-07-29 15:48:06 +00:00
if ( * line + + ! = ' - ' )
return 0 ;
if ( * line + + ! = ' - ' )
return 0 ;
len = strlen ( boundary ) ;
2003-08-29 14:27:15 +00:00
if ( strncasecmp ( line , boundary , len ) ! = 0 )
return 0 ;
2004-11-27 13:20:21 +00:00
/*
* Use < rather than = = because some broken mails have white
* space after the boundary
*/
2004-11-26 23:01:53 +00:00
if ( strlen ( line ) < ( len + 2 ) )
2003-07-29 15:48:06 +00:00
return 0 ;
line = & line [ len ] ;
if ( * line + + ! = ' - ' )
return 0 ;
return * line = = ' - ' ;
}
/*
* Initialise the various lookup tables
*/
static int
initialiseTables ( table_t * * rfc821Table , table_t * * subtypeTable )
{
const struct tableinit * tableinit ;
/*
* Initialise the various look up tables
*/
* rfc821Table = tableCreate ( ) ;
assert ( * rfc821Table ! = NULL ) ;
for ( tableinit = rfc821headers ; tableinit - > key ; tableinit + + )
2004-06-09 18:18:59 +00:00
if ( tableInsert ( * rfc821Table , tableinit - > key , tableinit - > value ) < 0 ) {
tableDestroy ( * rfc821Table ) ;
2004-09-18 15:03:15 +00:00
* rfc821Table = NULL ;
2003-07-29 15:48:06 +00:00
return - 1 ;
2004-06-09 18:18:59 +00:00
}
2003-07-29 15:48:06 +00:00
* subtypeTable = tableCreate ( ) ;
assert ( * subtypeTable ! = NULL ) ;
for ( tableinit = mimeSubtypes ; tableinit - > key ; tableinit + + )
if ( tableInsert ( * subtypeTable , tableinit - > key , tableinit - > value ) < 0 ) {
tableDestroy ( * rfc821Table ) ;
2004-06-09 18:18:59 +00:00
tableDestroy ( * subtypeTable ) ;
2004-09-18 15:03:15 +00:00
* rfc821Table = NULL ;
* subtypeTable = NULL ;
2003-07-29 15:48:06 +00:00
return - 1 ;
}
return 0 ;
}
/*
2003-08-02 22:37:52 +00:00
* If there ' s a HTML text version use that , otherwise
2003-07-29 15:48:06 +00:00
* use the first text part , otherwise just use the
2003-08-02 22:37:52 +00:00
* first one around . HTML text is most likely to include
* a scripting worm
2003-07-29 15:48:06 +00:00
*
* If we can ' t find one , return - 1
*/
static int
getTextPart ( message * const messages [ ] , size_t size )
{
size_t i ;
2004-09-03 16:02:25 +00:00
int textpart = - 1 ;
2003-07-29 15:48:06 +00:00
for ( i = 0 ; i < size ; i + + ) {
assert ( messages [ i ] ! = NULL ) ;
2004-09-03 16:02:25 +00:00
if ( messageGetMimeType ( messages [ i ] ) = = TEXT ) {
if ( strcasecmp ( messageGetMimeSubtype ( messages [ i ] ) , " html " ) = = 0 )
return ( int ) i ;
textpart = ( int ) i ;
}
2003-07-29 15:48:06 +00:00
}
2004-09-03 16:02:25 +00:00
return textpart ;
2003-07-29 15:48:06 +00:00
}
/*
* strip -
2004-09-18 15:03:15 +00:00
* Remove the trailing spaces from a buffer . Don ' t call this directly ,
* always call strstrip ( ) which is a wrapper to this routine to be used with
* NUL terminated strings . This code looks a bit strange because of it ' s
* heritage from code that worked on strings that weren ' t necessarily NUL
* terminated .
* TODO : rewrite for clamAV
*
2003-07-29 15:48:06 +00:00
* Returns it ' s new length ( a la strlen )
*
* len must be int not size_t because of the > = 0 test , it is sizeof ( buf )
* not strlen ( buf )
*/
static size_t
strip ( char * buf , int len )
{
register char * ptr ;
register size_t i ;
if ( ( buf = = NULL ) | | ( len < = 0 ) )
2004-09-18 15:03:15 +00:00
return 0 ;
2003-07-29 15:48:06 +00:00
i = strlen ( buf ) ;
if ( len > ( int ) ( i + 1 ) )
2004-09-18 15:03:15 +00:00
return i ;
2003-07-29 15:48:06 +00:00
ptr = & buf [ - - len ] ;
# if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
do
if ( * ptr )
* ptr = ' \0 ' ;
2004-08-12 10:37:53 +00:00
while ( ( - - len > = 0 ) & & ( ! isgraph ( * - - ptr ) ) & & ( * ptr ! = ' \n ' ) & & ( * ptr ! = ' \r ' ) ) ;
2003-07-29 15:48:06 +00:00
# else /* more characters can be displayed on DOS */
do
# ifndef REAL_MODE_DOS
if ( * ptr ) /* C8.0 puts into a text area */
# endif
* ptr = ' \0 ' ;
while ( ( - - len > = 0 ) & & ( ( * - - ptr = = ' \0 ' ) | | ( isspace ( ( int ) * ptr ) ) ) ) ;
# endif
return ( ( size_t ) ( len + 1 ) ) ;
}
/*
* strstrip :
* Strip a given string
*/
2004-04-29 09:01:16 +00:00
size_t
2003-07-29 15:48:06 +00:00
strstrip ( char * s )
{
if ( s = = ( char * ) NULL )
return ( 0 ) ;
2004-06-22 04:08:02 +00:00
2006-07-25 15:09:45 +00:00
return ( strip ( s , ( int ) strlen ( s ) + 1 ) ) ;
2003-07-29 15:48:06 +00:00
}
static int
parseMimeHeader ( message * m , const char * cmd , const table_t * rfc821Table , const char * arg )
{
2004-11-27 21:56:41 +00:00
char * copy , * p ;
const char * ptr ;
2004-09-16 08:58:56 +00:00
int commandNumber ;
2004-08-11 14:48:13 +00:00
2003-07-29 15:48:06 +00:00
cli_dbgmsg ( " parseMimeHeader: cmd='%s', arg='%s' \n " , cmd , arg ) ;
2004-09-16 08:58:56 +00:00
2005-02-16 22:20:49 +00:00
copy = rfc822comments ( cmd , NULL ) ;
2004-11-27 21:56:41 +00:00
if ( copy ) {
commandNumber = tableFind ( rfc821Table , copy ) ;
free ( copy ) ;
2004-09-20 08:31:56 +00:00
} else
commandNumber = tableFind ( rfc821Table , cmd ) ;
2004-09-16 08:58:56 +00:00
2005-02-16 22:20:49 +00:00
copy = rfc822comments ( arg , NULL ) ;
2003-07-29 15:48:06 +00:00
2004-11-27 21:56:41 +00:00
if ( copy )
ptr = copy ;
else
ptr = arg ;
2004-11-07 16:59:42 +00:00
2004-09-16 08:58:56 +00:00
switch ( commandNumber ) {
2003-07-29 15:48:06 +00:00
case CONTENT_TYPE :
/*
* Fix for non RFC1521 compliant mailers
* that send content - type : Text instead
* of content - type : Text / Plain , or
* just simply " Content-Type: "
*/
2003-09-15 13:09:42 +00:00
if ( arg = = NULL )
2004-08-26 09:35:40 +00:00
/*
* According to section 4 of RFC1521 :
* " Note also that a subtype specification is
* MANDATORY . There are no default subtypes "
*
2004-09-16 11:22:03 +00:00
* We have to break this and make an assumption
2004-08-26 09:35:40 +00:00
* for the subtype because virus writers and
* email client writers don ' t get it right
*/
cli_warnmsg ( " Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii \n " ) ;
2004-11-27 21:56:41 +00:00
else if ( strchr ( ptr , ' / ' ) = = NULL )
2004-08-26 09:35:40 +00:00
/*
* Empty field , such as
* Content - Type :
* which I believe is illegal according to
* RFC1521
*/
2004-11-27 21:56:41 +00:00
cli_dbgmsg ( " Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii \n " , ptr ) ;
2003-07-29 15:48:06 +00:00
else {
2004-09-20 12:44:03 +00:00
int i ;
2004-09-17 10:57:56 +00:00
char * mimeArgs ; /* RHS of the ; */
2003-07-29 15:48:06 +00:00
/*
* Some clients are broken and
* put white space after the ;
*/
2004-01-09 15:07:42 +00:00
if ( * arg = = ' / ' ) {
cli_warnmsg ( " Content-type '/' received, assuming application/octet-stream \n " ) ;
messageSetMimeType ( m , " application " ) ;
messageSetMimeSubtype ( m , " octet-stream " ) ;
} else {
/*
2004-09-16 14:26:20 +00:00
* The content type could be in quotes :
* Content - Type : " multipart/mixed "
* FIXME : this is a hack in that ignores
* the quotes , it doesn ' t handle
* them properly
2004-01-09 15:07:42 +00:00
*/
2004-11-27 21:56:41 +00:00
while ( isspace ( * ptr ) )
ptr + + ;
if ( ptr [ 0 ] = = ' \" ' )
ptr + + ;
2004-09-16 14:26:20 +00:00
2004-11-27 21:56:41 +00:00
if ( ptr [ 0 ] ! = ' / ' ) {
2004-09-20 08:31:56 +00:00
char * s ;
char * mimeType ; /* LHS of the ; */
2004-11-27 21:56:41 +00:00
# ifdef CL_THREAD_SAFE
2006-04-15 10:51:11 +00:00
char * strptr = NULL ;
2004-11-27 21:56:41 +00:00
# endif
2004-09-20 08:31:56 +00:00
2004-11-27 21:56:41 +00:00
s = mimeType = cli_strtok ( ptr , 0 , " ; " ) ;
2004-09-16 14:26:20 +00:00
/*
2004-09-17 10:57:56 +00:00
* Handle
* Content - Type : foo / bar multipart / mixed
* and
* Content - Type : multipart / mixed foo / bar
2004-09-16 14:26:20 +00:00
*/
2004-12-07 23:12:20 +00:00
if ( s & & * s ) for ( ; ; ) {
2004-09-21 08:16:29 +00:00
# ifdef CL_THREAD_SAFE
2004-09-17 10:57:56 +00:00
int set = messageSetMimeType ( m , strtok_r ( s , " / " , & strptr ) ) ;
2004-09-21 08:16:29 +00:00
# else
int set = messageSetMimeType ( m , strtok ( s , " / " ) ) ;
# endif
2004-09-17 10:57:56 +00:00
/*
* Stephen White < stephen @ earth . li >
* Some clients put space after
* the mime type but before
* the ;
*/
2004-09-21 08:16:29 +00:00
# ifdef CL_THREAD_SAFE
2004-09-17 10:57:56 +00:00
s = strtok_r ( NULL , " ; " , & strptr ) ;
2004-09-21 08:16:29 +00:00
# else
s = strtok ( NULL , " ; " ) ;
# endif
2004-09-17 10:57:56 +00:00
if ( s = = NULL )
break ;
if ( set ) {
2004-09-20 08:31:56 +00:00
size_t len = strstrip ( s ) - 1 ;
2004-09-17 10:57:56 +00:00
if ( s [ len ] = = ' \" ' ) {
s [ len ] = ' \0 ' ;
len = strstrip ( s ) ;
}
if ( len ) {
2004-09-20 08:31:56 +00:00
if ( strchr ( s , ' ' ) ) {
char * t = cli_strtok ( s , 0 , " " ) ;
2004-09-17 10:57:56 +00:00
2004-09-20 08:31:56 +00:00
messageSetMimeSubtype ( m , t ) ;
free ( t ) ;
} else
messageSetMimeSubtype ( m , s ) ;
2004-09-17 10:57:56 +00:00
}
2004-09-16 14:26:20 +00:00
}
2004-09-17 10:57:56 +00:00
while ( * s & & ! isspace ( * s ) )
s + + ;
if ( * s + + = = ' \0 ' )
break ;
if ( * s = = ' \0 ' )
break ;
2004-09-16 14:26:20 +00:00
}
2004-12-07 23:12:20 +00:00
if ( mimeType )
free ( mimeType ) ;
2004-09-20 08:31:56 +00:00
}
2004-01-09 15:07:42 +00:00
}
2003-07-29 15:48:06 +00:00
/*
2004-02-14 19:05:27 +00:00
* Add in all rest of the the arguments .
* e . g . if the header is this :
* Content - Type : ' , arg = ' multipart / mixed ; boundary = foo
* we find the boundary argument set it
2003-07-29 15:48:06 +00:00
*/
2004-09-20 12:44:03 +00:00
i = 1 ;
2004-11-27 21:56:41 +00:00
while ( ( mimeArgs = cli_strtok ( ptr , i + + , " ; " ) ) ! = NULL ) {
2004-09-20 12:44:03 +00:00
cli_dbgmsg ( " mimeArgs = '%s' \n " , mimeArgs ) ;
2004-09-17 10:57:56 +00:00
messageAddArguments ( m , mimeArgs ) ;
free ( mimeArgs ) ;
}
2003-07-29 15:48:06 +00:00
}
break ;
case CONTENT_TRANSFER_ENCODING :
2004-11-27 21:56:41 +00:00
messageSetEncoding ( m , ptr ) ;
2003-07-29 15:48:06 +00:00
break ;
case CONTENT_DISPOSITION :
2004-11-27 21:56:41 +00:00
p = cli_strtok ( ptr , 0 , " ; " ) ;
if ( p ) {
if ( * p ) {
messageSetDispositionType ( m , p ) ;
free ( p ) ;
p = cli_strtok ( ptr , 1 , " ; " ) ;
messageAddArgument ( m , p ) ;
}
free ( p ) ;
2004-09-21 08:16:29 +00:00
}
2005-10-31 21:22:22 +00:00
if ( ( p = ( char * ) messageFindArgument ( m , " filename " ) ) = = NULL )
/*
* Handle this type of header , without
* a filename ( e . g . some Worm . Torvil . D )
* Content - ID : < nRfkHdrKsAxRU >
* Content - Transfer - Encoding : base64
* Content - Disposition : attachment
*/
messageAddArgument ( m , " filename=unknown " ) ;
else
free ( p ) ;
2003-07-29 15:48:06 +00:00
}
2004-11-27 21:56:41 +00:00
if ( copy )
free ( copy ) ;
2003-07-29 15:48:06 +00:00
2004-08-11 14:48:13 +00:00
return 0 ;
2003-07-29 15:48:06 +00:00
}
2004-01-28 10:16:51 +00:00
/*
* Save the text portion of the message
*/
static void
2006-07-03 09:19:15 +00:00
saveTextPart ( message * m , const char * dir , int destroy_text )
2004-01-28 10:16:51 +00:00
{
2004-08-22 10:37:32 +00:00
fileblob * fb ;
2004-01-28 10:16:51 +00:00
messageAddArgument ( m , " filename=textportion " ) ;
2006-07-03 09:19:15 +00:00
if ( ( fb = messageToFileblob ( m , dir , destroy_text ) ) ! = NULL ) {
2004-01-28 10:16:51 +00:00
/*
* Save main part to scan that
*/
2004-09-28 18:40:12 +00:00
cli_dbgmsg ( " Saving main message \n " ) ;
2004-01-28 10:16:51 +00:00
2004-08-22 10:37:32 +00:00
fileblobDestroy ( fb ) ;
2004-01-28 10:16:51 +00:00
}
}
2004-09-17 09:11:06 +00:00
/*
2004-09-20 08:31:56 +00:00
* Handle RFC822 comments in headers .
2005-02-16 22:20:49 +00:00
* If out = = NULL , return a buffer without the comments , the caller must free
* the returned buffer
* Return NULL on error or if the input * has no comments .
2004-09-20 08:31:56 +00:00
* See secion 3.4 .3 of RFC822
2004-09-17 09:11:06 +00:00
* TODO : handle comments that go on to more than one line
*/
static char *
2005-02-16 22:20:49 +00:00
rfc822comments ( const char * in , char * out )
2004-09-17 09:11:06 +00:00
{
const char * iptr ;
2005-02-16 22:20:49 +00:00
char * optr ;
2004-09-17 09:11:06 +00:00
int backslash , inquote , commentlevel ;
if ( in = = NULL )
2004-09-20 08:31:56 +00:00
return NULL ;
2004-09-17 09:11:06 +00:00
if ( strchr ( in , ' ( ' ) = = NULL )
2004-09-20 08:31:56 +00:00
return NULL ;
2004-09-17 09:11:06 +00:00
2005-02-16 22:20:49 +00:00
assert ( out ! = in ) ;
if ( out = = NULL ) {
out = cli_malloc ( strlen ( in ) + 1 ) ;
if ( out = = NULL )
return NULL ;
}
2004-09-17 09:11:06 +00:00
backslash = commentlevel = inquote = 0 ;
optr = out ;
cli_dbgmsg ( " rfc822comments: contains a comment \n " ) ;
for ( iptr = in ; * iptr ; iptr + + )
if ( backslash ) {
2004-11-10 10:12:27 +00:00
if ( commentlevel = = 0 )
* optr + + = * iptr ;
2004-09-17 09:11:06 +00:00
backslash = 0 ;
} else switch ( * iptr ) {
case ' \\ ' :
backslash = 1 ;
break ;
case ' \" ' :
2005-03-02 20:08:24 +00:00
* optr + + = ' \" ' ;
2004-09-17 09:11:06 +00:00
inquote = ! inquote ;
break ;
case ' ( ' :
2005-03-02 20:08:24 +00:00
if ( inquote )
* optr + + = ' ( ' ;
else
commentlevel + + ;
2004-09-17 09:11:06 +00:00
break ;
case ' ) ' :
2005-03-02 20:08:24 +00:00
if ( inquote )
* optr + + = ' ) ' ;
else if ( commentlevel > 0 )
2004-09-17 09:11:06 +00:00
commentlevel - - ;
break ;
default :
if ( commentlevel = = 0 )
* optr + + = * iptr ;
}
if ( backslash ) /* last character was a single backslash */
* optr + + = ' \\ ' ;
* optr = ' \0 ' ;
2005-02-16 22:20:49 +00:00
/*strstrip(out);*/
2004-09-17 09:11:06 +00:00
cli_dbgmsg ( " rfc822comments '%s'=>'%s' \n " , in , out ) ;
return out ;
}
2004-09-16 18:03:25 +00:00
/*
* Handle RFC2047 encoding . Returns a malloc ' d buffer that the caller must
* free , or NULL on error
*/
static char *
rfc2047 ( const char * in )
{
char * out , * pout ;
size_t len ;
2004-10-14 21:20:25 +00:00
if ( ( strstr ( in , " =? " ) = = NULL ) | | ( strstr ( in , " ?= " ) = = NULL ) )
2004-09-16 18:03:25 +00:00
return strdup ( in ) ;
cli_dbgmsg ( " rfc2047 '%s' \n " , in ) ;
out = cli_malloc ( strlen ( in ) + 1 ) ;
if ( out = = NULL )
return NULL ;
pout = out ;
/* For each RFC2047 string */
while ( * in ) {
2004-10-14 17:47:19 +00:00
char encoding , * ptr , * enctext ;
2004-09-16 18:03:25 +00:00
message * m ;
blob * b ;
/* Find next RFC2047 string */
while ( * in ) {
if ( ( * in = = ' = ' ) & & ( in [ 1 ] = = ' ? ' ) ) {
in + = 2 ;
break ;
}
* pout + + = * in + + ;
}
/* Skip over charset, find encoding */
while ( ( * in ! = ' ? ' ) & & * in )
in + + ;
if ( * in = = ' \0 ' )
break ;
encoding = * + + in ;
encoding = tolower ( encoding ) ;
if ( ( encoding ! = ' q ' ) & & ( encoding ! = ' b ' ) ) {
2005-05-11 15:24:33 +00:00
cli_warnmsg ( " Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net \n " , encoding ) ;
2004-10-04 10:52:39 +00:00
free ( out ) ;
out = NULL ;
2004-09-16 18:03:25 +00:00
break ;
}
/* Skip to encoded text */
if ( * + + in ! = ' ? ' )
break ;
if ( * + + in = = ' \0 ' )
break ;
2004-10-14 17:47:19 +00:00
enctext = strdup ( in ) ;
if ( enctext = = NULL ) {
free ( out ) ;
out = NULL ;
break ;
}
2004-09-16 18:03:25 +00:00
in = strstr ( in , " ?= " ) ;
2004-10-14 17:47:19 +00:00
if ( in = = NULL ) {
free ( enctext ) ;
2004-09-16 18:03:25 +00:00
break ;
2004-10-14 17:47:19 +00:00
}
2004-09-16 18:03:25 +00:00
in + = 2 ;
ptr = strstr ( enctext , " ?= " ) ;
assert ( ptr ! = NULL ) ;
* ptr = ' \0 ' ;
/*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
m = messageCreate ( ) ;
2004-09-28 18:40:12 +00:00
if ( m = = NULL )
2004-09-16 18:03:25 +00:00
break ;
2004-11-28 22:06:40 +00:00
messageAddStr ( m , enctext ) ;
2004-10-14 17:47:19 +00:00
free ( enctext ) ;
2004-09-18 15:03:15 +00:00
switch ( encoding ) {
2004-09-16 18:03:25 +00:00
case ' q ' :
messageSetEncoding ( m , " quoted-printable " ) ;
break ;
case ' b ' :
messageSetEncoding ( m , " base64 " ) ;
break ;
}
2006-07-01 16:21:03 +00:00
b = messageToBlob ( m , 1 ) ;
2004-09-16 18:03:25 +00:00
len = blobGetDataSize ( b ) ;
cli_dbgmsg ( " Decoded as '%*.*s' \n " , len , len , blobGetData ( b ) ) ;
memcpy ( pout , blobGetData ( b ) , len ) ;
blobDestroy ( b ) ;
messageDestroy ( m ) ;
if ( pout [ len - 1 ] = = ' \n ' )
pout + = len - 1 ;
else
pout + = len ;
}
2004-11-07 16:42:55 +00:00
if ( out = = NULL )
return NULL ;
* pout = ' \0 ' ;
2004-09-16 18:03:25 +00:00
2004-11-07 16:42:55 +00:00
cli_dbgmsg ( " rfc2047 returns '%s' \n " , out ) ;
2004-09-16 18:03:25 +00:00
return out ;
}
2004-10-05 15:48:47 +00:00
# ifdef PARTIAL_DIR
/*
* Handle partial messages
*/
static int
rfc1341 ( message * m , const char * dir )
{
fileblob * fb ;
2004-10-20 10:39:15 +00:00
char * arg , * id , * number , * total , * oldfilename ;
2004-11-12 09:46:01 +00:00
const char * tmpdir ;
2004-11-27 14:49:13 +00:00
char pdir [ NAME_MAX + 1 ] ;
2004-11-12 09:46:01 +00:00
2004-12-19 13:50:08 +00:00
id = ( char * ) messageFindArgument ( m , " id " ) ;
if ( id = = NULL )
return - 1 ;
2006-04-09 09:36:36 +00:00
# ifdef C_CYGWIN
2004-11-12 22:22:21 +00:00
if ( ( tmpdir = getenv ( " TEMP " ) ) = = ( char * ) NULL )
if ( ( tmpdir = getenv ( " TMP " ) ) = = ( char * ) NULL )
if ( ( tmpdir = getenv ( " TMPDIR " ) ) = = ( char * ) NULL )
tmpdir = " C: \\ " ;
2004-11-12 09:46:01 +00:00
# else
2004-11-12 22:22:21 +00:00
if ( ( tmpdir = getenv ( " TMPDIR " ) ) = = ( char * ) NULL )
if ( ( tmpdir = getenv ( " TMP " ) ) = = ( char * ) NULL )
if ( ( tmpdir = getenv ( " TEMP " ) ) = = ( char * ) NULL )
2004-11-12 09:46:01 +00:00
# ifdef P_tmpdir
2004-11-12 22:22:21 +00:00
tmpdir = P_tmpdir ;
2004-11-12 09:46:01 +00:00
# else
2004-11-12 22:22:21 +00:00
tmpdir = " /tmp " ;
2004-11-12 09:46:01 +00:00
# endif
# endif
2004-10-05 15:48:47 +00:00
2004-11-27 14:49:13 +00:00
snprintf ( pdir , sizeof ( pdir ) - 1 , " %s/clamav-partial " , tmpdir ) ;
2004-11-12 09:46:01 +00:00
if ( ( mkdir ( pdir , 0700 ) < 0 ) & & ( errno ! = EEXIST ) ) {
cli_errmsg ( " Can't create the directory '%s' \n " , pdir ) ;
2004-10-05 15:48:47 +00:00
return - 1 ;
2004-10-20 10:39:15 +00:00
} else {
struct stat statb ;
2004-11-12 09:46:01 +00:00
if ( stat ( pdir , & statb ) < 0 ) {
cli_errmsg ( " Can't stat the directory '%s' \n " , pdir ) ;
2004-10-20 10:39:15 +00:00
return - 1 ;
}
if ( statb . st_mode & 077 )
cli_warnmsg ( " Insecure partial directory %s (mode 0%o) \n " ,
2004-11-12 09:46:01 +00:00
pdir , statb . st_mode & 0777 ) ;
2004-10-05 15:48:47 +00:00
}
number = ( char * ) messageFindArgument ( m , " number " ) ;
if ( number = = NULL ) {
free ( id ) ;
return - 1 ;
}
oldfilename = ( char * ) messageFindArgument ( m , " filename " ) ;
if ( oldfilename = = NULL )
oldfilename = ( char * ) messageFindArgument ( m , " name " ) ;
arg = cli_malloc ( 10 + strlen ( id ) + strlen ( number ) ) ;
2006-01-22 20:24:33 +00:00
if ( arg ) {
sprintf ( arg , " filename=%s%s " , id , number ) ;
messageAddArgument ( m , arg ) ;
free ( arg ) ;
}
2004-10-05 15:48:47 +00:00
if ( oldfilename ) {
cli_warnmsg ( " Must reset to %s \n " , oldfilename ) ;
free ( oldfilename ) ;
}
2006-07-01 16:21:03 +00:00
if ( ( fb = messageToFileblob ( m , pdir , 0 ) ) = = NULL ) {
2004-10-05 15:48:47 +00:00
free ( id ) ;
free ( number ) ;
return - 1 ;
}
fileblobDestroy ( fb ) ;
total = ( char * ) messageFindArgument ( m , " total " ) ;
cli_dbgmsg ( " rfc1341: %s, %s of %s \n " , id , number , ( total ) ? total : " ? " ) ;
if ( total ) {
int n = atoi ( number ) ;
int t = atoi ( total ) ;
DIR * dd = NULL ;
2004-12-19 13:50:08 +00:00
free ( total ) ;
2004-10-05 15:48:47 +00:00
/*
* If it ' s the last one - reassemble it
2004-10-10 11:12:28 +00:00
* FIXME : this assumes that we receive the parts in order
2004-10-05 15:48:47 +00:00
*/
2004-11-12 09:46:01 +00:00
if ( ( n = = t ) & & ( ( dd = opendir ( pdir ) ) ! = NULL ) ) {
2004-10-05 15:48:47 +00:00
FILE * fout ;
char outname [ NAME_MAX + 1 ] ;
2005-05-04 19:11:52 +00:00
time_t now ;
2004-10-05 15:48:47 +00:00
snprintf ( outname , sizeof ( outname ) - 1 , " %s/%s " , dir , id ) ;
cli_dbgmsg ( " outname: %s \n " , outname ) ;
fout = fopen ( outname , " wb " ) ;
if ( fout = = NULL ) {
2004-10-10 11:12:28 +00:00
cli_errmsg ( " Can't open '%s' for writing " , outname ) ;
2004-10-05 15:48:47 +00:00
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
return - 1 ;
}
2005-05-04 19:11:52 +00:00
time ( & now ) ;
2004-10-05 15:48:47 +00:00
for ( n = 1 ; n < = t ; n + + ) {
char filename [ NAME_MAX + 1 ] ;
2004-10-20 10:39:15 +00:00
const struct dirent * dent ;
# if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
2004-11-04 10:15:49 +00:00
union {
struct dirent d ;
char b [ offsetof ( struct dirent , d_name ) + NAME_MAX + 1 ] ;
} result ;
2004-10-20 10:39:15 +00:00
# endif
2004-10-05 15:48:47 +00:00
snprintf ( filename , sizeof ( filename ) , " %s%d " , id , n ) ;
2004-10-21 10:18:40 +00:00
2004-11-04 10:15:49 +00:00
# ifdef HAVE_READDIR_R_3
while ( ( readdir_r ( dd , & result . d , & dent ) = = 0 ) & & dent ) {
2004-10-05 15:48:47 +00:00
# elif defined(HAVE_READDIR_R_2)
2004-11-04 10:15:49 +00:00
while ( ( dent = ( struct dirent * ) readdir_r ( dd , & result . d ) ) ) {
2004-10-21 10:18:40 +00:00
# else /*!HAVE_READDIR_R*/
2004-10-05 15:48:47 +00:00
while ( ( dent = readdir ( dd ) ) ) {
# endif
FILE * fin ;
2005-05-04 19:11:52 +00:00
char buffer [ BUFSIZ ] , fullname [ NAME_MAX + 1 ] ;
2004-10-05 15:48:47 +00:00
int nblanks ;
2004-11-12 09:46:01 +00:00
extern short cli_leavetemps_flag ;
2005-05-04 19:11:52 +00:00
struct stat statb ;
2004-10-05 15:48:47 +00:00
2006-04-30 18:24:20 +00:00
# ifndef C_CYGWIN
2004-10-05 15:48:47 +00:00
if ( dent - > d_ino = = 0 )
continue ;
2006-04-09 09:36:36 +00:00
# endif
2004-10-05 15:48:47 +00:00
2006-01-22 20:24:33 +00:00
snprintf ( fullname , sizeof ( fullname ) - 1 ,
" %s/%s " , pdir , dent - > d_name ) ;
2005-05-04 19:11:52 +00:00
if ( strncmp ( filename , dent - > d_name , strlen ( filename ) ) ! = 0 ) {
if ( ! cli_leavetemps_flag )
continue ;
if ( stat ( fullname , & statb ) < 0 )
continue ;
if ( now - statb . st_mtime > ( time_t ) ( 7 * 24 * 3600 ) )
if ( unlink ( fullname ) > = 0 )
cli_warnmsg ( " removed old RFC1341 file %s \n " , fullname ) ;
2004-10-05 15:48:47 +00:00
continue ;
2005-05-04 19:11:52 +00:00
}
2004-10-05 15:48:47 +00:00
2005-05-04 19:11:52 +00:00
fin = fopen ( fullname , " rb " ) ;
2004-10-05 15:48:47 +00:00
if ( fin = = NULL ) {
2005-05-04 19:11:52 +00:00
cli_errmsg ( " Can't open '%s' for reading " , fullname ) ;
2004-10-05 15:48:47 +00:00
fclose ( fout ) ;
unlink ( outname ) ;
free ( id ) ;
free ( number ) ;
closedir ( dd ) ;
return - 1 ;
}
nblanks = 0 ;
2005-12-09 17:19:10 +00:00
while ( fgets ( buffer , sizeof ( buffer ) - 1 , fin ) ! = NULL )
2004-10-05 15:48:47 +00:00
/*
* Ensure that trailing newlines
* aren ' t copied
*/
2004-12-19 13:50:08 +00:00
if ( buffer [ 0 ] = = ' \n ' )
2004-10-05 15:48:47 +00:00
nblanks + + ;
2004-12-19 13:50:08 +00:00
else {
2004-10-05 15:48:47 +00:00
if ( nblanks )
do
putc ( ' \n ' , fout ) ;
while ( - - nblanks > 0 ) ;
fputs ( buffer , fout ) ;
}
fclose ( fin ) ;
2004-11-12 09:46:01 +00:00
/* don't unlink if leave temps */
if ( ! cli_leavetemps_flag )
2005-05-04 19:11:52 +00:00
unlink ( fullname ) ;
2004-10-05 15:48:47 +00:00
break ;
}
rewinddir ( dd ) ;
}
closedir ( dd ) ;
fclose ( fout ) ;
}
}
2004-12-19 13:50:08 +00:00
free ( number ) ;
2004-10-05 15:48:47 +00:00
free ( id ) ;
return 0 ;
}
# endif
2006-09-13 21:40:03 +00:00
# ifdef CL_EXPERIMENTAL
static void
hrefs_done ( blob * b , tag_arguments_t * hrefs )
{
if ( b )
blobDestroy ( b ) ;
html_tag_arg_free ( hrefs ) ;
}
/*
* This used to be part of checkURLs , split out , because phishingScan needs it
* too , and phishingScan might be used in situations where checkURLs is
* disabled ( see ifdef )
*/
static blob *
getHrefs ( message * m , tag_arguments_t * hrefs )
{
2006-09-20 10:24:17 +00:00
blob * b = messageToBlob ( m , 0 ) ;
2006-09-13 21:40:03 +00:00
size_t len ;
if ( b = = NULL )
return NULL ;
len = blobGetDataSize ( b ) ;
if ( len = = 0 ) {
blobDestroy ( b ) ;
return NULL ;
}
/* TODO: make this size customisable */
if ( len > 100 * 1024 ) {
2006-09-21 14:42:06 +00:00
cli_warnmsg ( " Viruses pointed to by URLs not scanned in large message \n " ) ;
2006-09-13 21:40:03 +00:00
blobDestroy ( b ) ;
return NULL ;
}
hrefs - > count = 0 ;
hrefs - > tag = hrefs - > value = NULL ;
hrefs - > contents = NULL ;
2006-09-20 10:24:17 +00:00
cli_dbgmsg ( " getHrefs: calling html_normalise_mem \n " ) ;
2006-09-13 21:40:03 +00:00
if ( ! html_normalise_mem ( blobGetData ( b ) , len , NULL , hrefs ) ) {
blobDestroy ( b ) ;
return NULL ;
}
2006-09-20 10:24:17 +00:00
cli_dbgmsg ( " getHrefs: html_normalise_mem returned \n " ) ;
2006-09-13 21:40:03 +00:00
/* TODO: Do we need to call remove_html_comments? */
return b ;
}
static void
checkURLs ( message * mainMessage , mbox_ctx * mctx , int * rc , int is_html )
{
tag_arguments_t hrefs ;
blob * b ;
hrefs . scanContents = ( ! ( mctx - > ctx - > options & CL_SCAN_NOPHISHING ) ) ; /* aCaB: stripped GA related stuff */
# if (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0)
if ( ! hrefs . scanContents )
2006-09-21 09:37:47 +00:00
/*
* Don ' t waste time extracting hrefs ( parsing html ) , nobody
* will need it
*/
2006-09-13 21:40:03 +00:00
return ;
# endif
hrefs . count = 0 ;
hrefs . tag = hrefs . value = NULL ;
hrefs . contents = NULL ;
b = getHrefs ( mainMessage , & hrefs ) ;
if ( b ) {
if ( ! ( mctx - > ctx - > options & CL_SCAN_NOPHISHING ) ) {
if ( phishingScan ( mainMessage , mctx - > dir , mctx - > ctx , & hrefs ) = = CL_VIRUS ) {
mainMessage - > isInfected = TRUE ;
* rc = 3 ;
cli_dbgmsg ( " PH:Phishing found \n " ) ;
}
}
2006-09-16 09:55:25 +00:00
if ( is_html & & ( mctx - > ctx - > options & CL_SCAN_MAILURL ) & & ( * rc ! = 3 ) )
do_checkURLs ( mainMessage , mctx - > dir , & hrefs ) ;
2006-09-13 21:40:03 +00:00
}
hrefs_done ( b , & hrefs ) ;
}
# if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
static void
do_checkURLs ( message * m , const char * dir , tag_arguments_t * hrefs )
{
table_t * t ;
int i , n ;
# if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
pthread_t tid [ FOLLOWURLS ] ;
struct arg args [ FOLLOWURLS ] ;
# endif
t = tableCreate ( ) ;
if ( t = = NULL )
return ;
n = 0 ;
for ( i = 0 ; i < hrefs - > count ; i + + ) {
const char * url = ( const char * ) hrefs - > value [ i ] ;
/*
* TODO : If it ' s an image source , it ' d be nice to note beacons
* where width = " 0 " height = " 0 " , which needs support from
* the HTML normalise code
*/
if ( strncasecmp ( " http:// " , url , 7 ) = = 0 ) {
char * ptr ;
# ifdef WITH_CURL
# ifndef CL_THREAD_SAFE
struct arg arg ;
# endif
# else /*!WITH_CURL*/
2006-09-18 16:58:39 +00:00
size_t len ;
2006-09-13 21:40:03 +00:00
# ifdef CL_THREAD_SAFE
static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER ;
# endif
struct stat statb ;
char cmd [ 512 ] ;
# endif /*WITH_CURL*/
char name [ NAME_MAX + 1 ] ;
if ( tableFind ( t , url ) = = 1 ) {
cli_dbgmsg ( " URL %s already downloaded \n " , url ) ;
continue ;
}
/*
* What about foreign character spoofing ?
* It would be useful be able to check if url
* is the same as the text displayed , e . g .
* < a href = " http://dodgy.biz " > www . paypal . com < / a >
* but that needs support from HTML normalise
*/
if ( strchr ( url , ' % ' ) & & strchr ( url , ' @ ' ) )
cli_warnmsg ( " Possible URL spoofing attempt noticed, but not yet handled (%s) \n " , url ) ;
if ( n = = FOLLOWURLS ) {
cli_warnmsg ( " URL %s will not be scanned \n " , url ) ;
break ;
}
( void ) tableInsert ( t , url , 1 ) ;
cli_dbgmsg ( " Downloading URL %s to be scanned \n " , url ) ;
strncpy ( name , url , sizeof ( name ) - 1 ) ;
name [ sizeof ( name ) - 1 ] = ' \0 ' ;
for ( ptr = name ; * ptr ; ptr + + )
if ( * ptr = = ' / ' )
* ptr = ' _ ' ;
# ifdef WITH_CURL
# ifdef CL_THREAD_SAFE
2006-09-21 14:42:06 +00:00
args [ n ] . curl = curl_easy_init ( ) ;
if ( args [ n ] . curl = = NULL ) {
cli_errmsg ( " curl_easy_init failed \n " ) ;
continue ;
}
2006-09-13 21:40:03 +00:00
args [ n ] . dir = dir ;
2006-09-26 16:43:17 +00:00
args [ n ] . url = strdup ( url ) ;
2006-09-13 21:40:03 +00:00
args [ n ] . filename = strdup ( name ) ;
pthread_create ( & tid [ n ] , NULL , getURL , & args [ n ] ) ;
# else
2006-09-21 14:42:06 +00:00
/* easy isn't the word I'd use... */
arg . curl = curl_easy_init ( ) ;
if ( arg . curl = = NULL ) {
cli_errmsg ( " curl_easy_init failed \n " ) ;
continue ;
}
2006-09-26 16:43:17 +00:00
arg . url = strdup ( url ) ;
2006-09-13 21:40:03 +00:00
arg . dir = dir ;
arg . filename = name ;
getURL ( & arg ) ;
2006-09-21 14:42:06 +00:00
curl_easy_cleanup ( arg . curl ) ;
2006-09-26 16:43:17 +00:00
free ( arg . url ) ;
2006-09-13 21:40:03 +00:00
# endif
# else /*!WITH_CURL*/
cli_warnmsg ( " The use of mail-follow-urls without CURL being installed is deprecated \n " ) ;
/*
* TODO : maximum size and timeouts
*/
len = sizeof ( cmd ) - 26 - strlen ( dir ) - strlen ( name ) ;
# ifdef CL_DEBUG
snprintf ( cmd , sizeof ( cmd ) - 1 , " GET -t10 \" %.*s \" >%s/%s " , len , url , dir , name ) ;
# else
snprintf ( cmd , sizeof ( cmd ) - 1 , " GET -t10 \" %.*s \" >%s/%s 2>/dev/null " , len , url , dir , name ) ;
# endif
cmd [ sizeof ( cmd ) - 1 ] = ' \0 ' ;
cli_dbgmsg ( " %s \n " , cmd ) ;
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & system_mutex ) ;
# endif
system ( cmd ) ;
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & system_mutex ) ;
# endif
snprintf ( cmd , sizeof ( cmd ) , " %s/%s " , dir , name ) ;
if ( stat ( cmd , & statb ) > = 0 )
if ( statb . st_size = = 0 ) {
cli_warnmsg ( " URL %s failed to download \n " , url ) ;
/*
* Don ' t bother scanning an empty file
*/
( void ) unlink ( cmd ) ;
}
# endif
+ + n ;
}
}
tableDestroy ( t ) ;
# if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
assert ( n < = FOLLOWURLS ) ;
cli_dbgmsg ( " checkURLs: waiting for %d thread(s) to finish \n " , n ) ;
while ( - - n > = 0 ) {
pthread_join ( tid [ n ] , NULL ) ;
free ( args [ n ] . filename ) ;
2006-09-26 16:43:17 +00:00
free ( args [ n ] . url ) ;
2006-09-21 14:42:06 +00:00
curl_easy_cleanup ( args [ n ] . curl ) ;
2006-09-13 21:40:03 +00:00
}
# endif
}
# else
static void
do_checkURLs ( message * m , const char * dir , tag_arguments_t * hrefs )
{
}
# endif
# else /*!CL_EXPERIMENTAL*/
2005-03-06 21:13:16 +00:00
# if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
2004-08-08 21:32:39 +00:00
static void
2006-09-20 10:24:17 +00:00
checkURLs ( message * m , mbox_ctx * mctx , int * rc , int is_html )
2004-08-08 21:32:39 +00:00
{
2006-07-01 16:21:03 +00:00
blob * b = messageToBlob ( m , 0 ) ;
2004-08-08 21:32:39 +00:00
size_t len ;
2004-08-12 10:37:53 +00:00
table_t * t ;
2004-09-14 20:51:01 +00:00
int i , n ;
2004-08-18 21:39:36 +00:00
# if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
2005-03-06 21:13:16 +00:00
pthread_t tid [ FOLLOWURLS ] ;
struct arg args [ FOLLOWURLS ] ;
2004-08-18 21:39:36 +00:00
# endif
2004-09-14 20:51:01 +00:00
tag_arguments_t hrefs ;
2004-08-08 21:32:39 +00:00
if ( b = = NULL )
return ;
len = blobGetDataSize ( b ) ;
2004-10-01 07:58:27 +00:00
if ( len = = 0 ) {
blobDestroy ( b ) ;
2004-08-18 15:56:38 +00:00
return ;
2004-10-01 07:58:27 +00:00
}
2004-08-18 15:56:38 +00:00
2004-08-12 10:37:53 +00:00
/* TODO: make this size customisable */
if ( len > 100 * 1024 ) {
cli_warnmsg ( " Viruses pointed to by URL not scanned in large message \n " ) ;
blobDestroy ( b ) ;
2004-10-01 07:58:27 +00:00
return ;
2004-08-12 10:37:53 +00:00
}
t = tableCreate ( ) ;
2004-09-15 22:11:50 +00:00
if ( t = = NULL ) {
blobDestroy ( b ) ;
return ;
}
2004-08-18 15:56:38 +00:00
2004-09-15 08:48:16 +00:00
hrefs . count = 0 ;
hrefs . tag = hrefs . value = NULL ;
2004-09-14 20:51:01 +00:00
cli_dbgmsg ( " checkURLs: calling html_normalise_mem \n " ) ;
2004-09-15 22:11:50 +00:00
if ( ! html_normalise_mem ( blobGetData ( b ) , len , NULL , & hrefs ) ) {
2004-09-06 11:05:44 +00:00
blobDestroy ( b ) ;
tableDestroy ( t ) ;
return ;
2004-09-15 22:11:50 +00:00
}
cli_dbgmsg ( " checkURLs: html_normalise_mem returned \n " ) ;
2004-09-13 13:18:52 +00:00
/* TODO: Do we need to call remove_html_comments? */
2004-08-12 10:37:53 +00:00
2004-09-14 20:51:01 +00:00
n = 0 ;
for ( i = 0 ; i < hrefs . count ; i + + ) {
2004-11-04 10:15:49 +00:00
const char * url = ( const char * ) hrefs . value [ i ] ;
2004-09-14 20:51:01 +00:00
2005-09-28 10:12:56 +00:00
/*
* TODO : If it ' s an image source , it ' d be nice to note beacons
* where width = " 0 " height = " 0 " , which needs support from
* the HTML normalise code
*/
2004-09-14 20:51:01 +00:00
if ( strncasecmp ( " http:// " , url , 7 ) = = 0 ) {
char * ptr ;
2004-08-18 21:39:36 +00:00
# ifdef WITH_CURL
# ifndef CL_THREAD_SAFE
struct arg arg ;
# endif
# else /*!WITH_CURL*/
2004-08-09 08:28:10 +00:00
# ifdef CL_THREAD_SAFE
static pthread_mutex_t system_mutex = PTHREAD_MUTEX_INITIALIZER ;
2004-08-12 10:37:53 +00:00
# endif
struct stat statb ;
char cmd [ 512 ] ;
2004-08-18 21:39:36 +00:00
# endif /*WITH_CURL*/
2004-09-30 21:50:39 +00:00
char name [ NAME_MAX + 1 ] ;
2004-09-14 20:51:01 +00:00
if ( tableFind ( t , url ) = = 1 ) {
cli_dbgmsg ( " URL %s already downloaded \n " , url ) ;
2004-08-11 14:48:13 +00:00
continue ;
}
2005-09-28 10:12:56 +00:00
/*
* What about foreign character spoofing ?
* It would be useful be able to check if url
* is the same as the text displayed , e . g .
* < a href = " http://dodgy.biz " > www . paypal . com < / a >
* but that needs support from HTML normalise
*/
if ( strchr ( url , ' % ' ) & & strchr ( url , ' @ ' ) )
2005-09-30 14:58:37 +00:00
cli_warnmsg ( " Possible URL spoofing attempt noticed, but not yet handled (%s) \n " , url ) ;
2005-09-28 10:12:56 +00:00
2005-12-09 17:19:10 +00:00
if ( n = = FOLLOWURLS ) {
cli_warnmsg ( " URL %s will not be scanned \n " , url ) ;
break ;
}
2004-09-14 20:51:01 +00:00
( void ) tableInsert ( t , url , 1 ) ;
cli_dbgmsg ( " Downloading URL %s to be scanned \n " , url ) ;
2004-11-09 12:25:44 +00:00
strncpy ( name , url , sizeof ( name ) - 1 ) ;
2004-11-09 13:33:38 +00:00
name [ sizeof ( name ) - 1 ] = ' \0 ' ;
2004-09-14 20:51:01 +00:00
for ( ptr = name ; * ptr ; ptr + + )
if ( * ptr = = ' / ' )
* ptr = ' _ ' ;
2004-08-08 21:32:39 +00:00
2004-08-18 07:46:59 +00:00
# ifdef WITH_CURL
2004-08-18 21:39:36 +00:00
# ifdef CL_THREAD_SAFE
2006-09-21 14:42:06 +00:00
args [ n ] . curl = curl_easy_init ( ) ;
if ( args [ n ] . curl = = NULL ) {
cli_errmsg ( " curl_easy_init failed \n " ) ;
continue ;
}
2006-09-13 17:43:57 +00:00
args [ n ] . dir = mctx - > dir ;
2004-09-30 21:50:39 +00:00
args [ n ] . url = url ;
2004-08-18 21:39:36 +00:00
args [ n ] . filename = strdup ( name ) ;
pthread_create ( & tid [ n ] , NULL , getURL , & args [ n ] ) ;
# else
2006-09-21 14:42:06 +00:00
/* easy isn't the word I'd use... */
arg . curl = curl_easy_init ( ) ;
if ( arg . curl = = NULL ) {
cli_errmsg ( " curl_easy_init failed \n " ) ;
continue ;
}
2004-09-14 20:51:01 +00:00
arg . url = url ;
2006-09-13 17:43:57 +00:00
arg . dir = mctx - > dir ;
2004-08-18 21:39:36 +00:00
arg . filename = name ;
getURL ( & arg ) ;
2006-09-21 14:42:06 +00:00
curl_easy_cleanup ( arg . curl ) ;
2004-08-18 21:39:36 +00:00
# endif
2006-04-19 11:33:49 +00:00
# else /*!WITH_CURL*/
cli_warnmsg ( " The use of mail-follow-urls without CURL being installed is deprecated \n " ) ;
2004-08-10 08:17:19 +00:00
/*
* TODO : maximum size and timeouts
*/
2006-09-13 17:43:57 +00:00
len = sizeof ( cmd ) - 26 - strlen ( mctx - > dir ) - strlen ( name ) ;
2004-11-09 12:25:44 +00:00
# ifdef CL_DEBUG
2006-09-13 17:43:57 +00:00
snprintf ( cmd , sizeof ( cmd ) - 1 , " GET -t10 \" %.*s \" >%s/%s " , len , url , mctx - > dir , name ) ;
2004-11-09 12:25:44 +00:00
# else
2006-09-13 17:43:57 +00:00
snprintf ( cmd , sizeof ( cmd ) - 1 , " GET -t10 \" %.*s \" >%s/%s 2>/dev/null " , len , url , mctx - > dir , name ) ;
2004-11-09 12:25:44 +00:00
# endif
cmd [ sizeof ( cmd ) - 1 ] = ' \0 ' ;
2004-08-08 21:32:39 +00:00
cli_dbgmsg ( " %s \n " , cmd ) ;
2004-08-09 08:28:10 +00:00
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & system_mutex ) ;
# endif
2004-08-08 21:32:39 +00:00
system ( cmd ) ;
2004-08-09 08:28:10 +00:00
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & system_mutex ) ;
# endif
2006-09-13 17:43:57 +00:00
snprintf ( cmd , sizeof ( cmd ) , " %s/%s " , mctx - > dir , name ) ;
2004-08-09 08:28:10 +00:00
if ( stat ( cmd , & statb ) > = 0 )
if ( statb . st_size = = 0 ) {
2004-09-14 20:51:01 +00:00
cli_warnmsg ( " URL %s failed to download \n " , url ) ;
2004-08-09 08:28:10 +00:00
/*
* Don ' t bother scanning an empty file
*/
( void ) unlink ( cmd ) ;
}
2004-08-10 08:17:19 +00:00
# endif
2004-08-18 21:39:36 +00:00
+ + n ;
2004-08-08 21:32:39 +00:00
}
}
blobDestroy ( b ) ;
2004-08-11 14:48:13 +00:00
tableDestroy ( t ) ;
2004-08-18 21:39:36 +00:00
# if defined(WITH_CURL) && defined(CL_THREAD_SAFE)
2005-03-06 21:13:16 +00:00
assert ( n < = FOLLOWURLS ) ;
2004-08-18 21:39:36 +00:00
cli_dbgmsg ( " checkURLs: waiting for %d thread(s) to finish \n " , n ) ;
while ( - - n > = 0 ) {
pthread_join ( tid [ n ] , NULL ) ;
free ( args [ n ] . filename ) ;
2006-09-21 14:42:06 +00:00
curl_easy_cleanup ( args [ n ] . curl ) ;
2004-08-18 21:39:36 +00:00
}
# endif
2004-09-30 21:50:39 +00:00
html_tag_arg_free ( & hrefs ) ;
2004-08-08 21:32:39 +00:00
}
2006-09-13 21:40:03 +00:00
# else
static void
checkURLs ( message * m , mbox_ctx * mctx , int * rc , int is_html )
{
}
# endif
2006-09-16 10:38:17 +00:00
# endif /* CL_EXPERIMENTAL */
2006-09-13 21:40:03 +00:00
2006-09-21 14:42:06 +00:00
# if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
2005-12-28 13:51:36 +00:00
/*
* Includes some Win32 patches by Gianluigi Tiesi < sherpya @ netfarm . it >
2006-02-01 09:38:20 +00:00
*
* FIXME : Often WMF exploits work by sending people an email directing them
* to a page which displays a picture containing the exploit . This is not
* currently found , since only the HTML on the referred page is downloaded .
* It would be useful to scan the HTML for references to pictures and
* download them for scanning . But that will hit performance so there is
* an issue here .
2005-12-28 13:51:36 +00:00
*/
2004-08-18 07:46:59 +00:00
# ifdef WITH_CURL
2006-09-21 09:37:47 +00:00
2006-09-21 16:40:51 +00:00
# ifdef CL_EXPERIMENTAL
/*
* Removing the reliance on libcurl
* Includes some of the freshclam hacks by Everton da Silva Marques
* everton . marques @ gmail . com >
*/
# include <netdb.h>
# include <sys/socket.h>
# include <netinet/in.h>
# include <net/if.h>
# include <arpa/inet.h>
# include <ctype.h>
# include <errno.h>
# include <fcntl.h>
# include <sys/time.h>
# include <stdlib.h>
# ifndef timercmp
# define timercmp(a, b, cmp) \
( ( ( a ) - > tv_sec = = ( b ) - > tv_sec ) ? \
( ( a ) - > tv_usec cmp ( b ) - > tv_usec ) : \
( ( a ) - > tv_sec cmp ( b ) - > tv_sec ) )
# endif /* timercmp */
# ifndef timersub
# define timersub(a, b, result) \
do { \
( result ) - > tv_sec = ( a ) - > tv_sec - ( b ) - > tv_sec ; \
( result ) - > tv_usec = ( a ) - > tv_usec - ( b ) - > tv_usec ; \
if ( ( result ) - > tv_usec < 0 ) { \
- - ( result ) - > tv_sec ; \
( result ) - > tv_usec + = 1000000 ; \
} \
} while ( 0 )
# endif /* timersub */
2006-09-22 08:17:25 +00:00
static long nonblock_fcntl ( int sock ) ;
static void restore_fcntl ( int sock , long fcntl_flags ) ;
static int nonblock_connect ( int sock , const struct sockaddr * addr , socklen_t addrlen , int secs ) ;
static int connect_error ( int sock ) ;
2006-09-22 12:59:17 +00:00
static int my_r_gethostbyname ( const char * hostname , struct hostent * hp , char * buf , size_t len ) ;
2006-09-21 16:40:51 +00:00
2006-09-26 16:43:17 +00:00
# define NONBLOCK_SELECT_MAX_FAILURES 3
# define NONBLOCK_MAX_BOGUS_LOOPS 10
2006-09-22 08:17:25 +00:00
2006-09-21 16:40:51 +00:00
static void *
# ifdef CL_THREAD_SAFE
getURL ( void * a )
# else
getURL ( struct arg * arg )
# endif
{
FILE * fp ;
# ifdef CL_THREAD_SAFE
struct arg * arg = ( struct arg * ) a ;
# endif
const char * url = arg - > url ;
const char * dir = arg - > dir ;
const char * filename = arg - > filename ;
char fout [ NAME_MAX + 1 ] ;
int sd , n ;
struct sockaddr_in server ;
in_addr_t ip ;
char buf [ BUFSIZ ] ;
char site [ BUFSIZ ] ;
2006-09-22 08:17:25 +00:00
in_port_t port ;
static in_port_t default_port ;
static int tcp ;
2006-09-22 18:37:22 +00:00
int doingsite , firstpacket ;
2006-09-21 16:40:51 +00:00
char * ptr ;
2006-09-26 16:43:17 +00:00
int flags , via_proxy ;
2006-09-21 16:40:51 +00:00
const char * proxy ;
2006-09-22 12:59:17 +00:00
if ( strlen ( url ) > ( sizeof ( site ) - 1 ) ) {
cli_dbgmsg ( " Ignoring long URL \" %s \" \n " , url ) ;
return NULL ;
}
2006-09-21 16:40:51 +00:00
snprintf ( fout , sizeof ( fout ) - 1 , " %s/%s " , dir , filename ) ;
fp = fopen ( fout , " wb " ) ;
if ( fp = = NULL ) {
cli_errmsg ( " Can't open '%s' for writing " , fout ) ;
return NULL ;
}
2006-09-22 12:59:17 +00:00
cli_dbgmsg ( " Saving %s to %s \n " , url , fout ) ;
2006-09-22 08:17:25 +00:00
if ( tcp = = 0 ) {
const struct protoent * proto = getprotobyname ( " tcp " ) ;
if ( proto = = NULL ) {
cli_warnmsg ( " Unknown prototol tcp, check /etc/protocols \n " ) ;
fclose ( fp ) ;
return NULL ;
}
tcp = proto - > p_proto ;
2006-09-22 18:37:22 +00:00
endprotoent ( ) ;
2006-09-22 08:17:25 +00:00
}
if ( default_port = = 0 ) {
const struct servent * servent = getservbyname ( " http " , " tcp " ) ;
if ( servent )
default_port = ( in_port_t ) ntohs ( servent - > s_port ) ;
else
default_port = 80 ;
endservent ( ) ;
}
port = default_port ;
2006-09-22 12:59:17 +00:00
doingsite = 1 ;
ptr = site ;
2006-09-21 16:40:51 +00:00
proxy = getenv ( " http_proxy " ) ; /* FIXME: handle no_proxy */
2006-09-26 16:43:17 +00:00
via_proxy = ( proxy & & * proxy ) ;
if ( via_proxy ) {
2006-09-21 16:40:51 +00:00
if ( strncasecmp ( proxy , " http:// " , 7 ) ! = 0 ) {
cli_warnmsg ( " Unsupported proxy protocol \n " ) ;
fclose ( fp ) ;
return NULL ;
}
2006-09-22 12:59:17 +00:00
cli_dbgmsg ( " Getting %s via %s \n " , url , proxy ) ;
2006-09-21 16:40:51 +00:00
proxy + = 7 ;
while ( * proxy ) {
if ( doingsite & & ( * proxy = = ' : ' ) ) {
port = 0 ;
while ( isdigit ( * + + proxy ) ) {
port * = 10 ;
port + = * proxy - ' 0 ' ;
}
continue ;
}
if ( doingsite & & ( * proxy = = ' / ' ) ) {
proxy + + ;
break ;
}
* ptr + + = * proxy + + ;
}
} else {
cli_dbgmsg ( " Getting %s \n " , url ) ;
if ( strncasecmp ( url , " http:// " , 7 ) ! = 0 ) {
cli_warnmsg ( " Unsupported protocol \n " ) ;
fclose ( fp ) ;
return NULL ;
}
url + = 7 ;
while ( * url ) {
if ( doingsite & & ( * url = = ' : ' ) ) {
port = 0 ;
while ( isdigit ( * + + url ) ) {
port * = 10 ;
port + = * url - ' 0 ' ;
}
continue ;
}
if ( doingsite & & ( * url = = ' / ' ) ) {
url + + ;
break ;
}
* ptr + + = * url + + ;
}
2006-09-22 12:59:17 +00:00
}
* ptr = ' \0 ' ;
2006-09-21 16:40:51 +00:00
2006-09-22 12:59:17 +00:00
memset ( ( char * ) & server , ' \0 ' , sizeof ( struct sockaddr_in ) ) ;
server . sin_family = AF_INET ;
server . sin_port = ( in_port_t ) htons ( port ) ;
2006-09-21 16:40:51 +00:00
2006-09-22 12:59:17 +00:00
ip = inet_addr ( site ) ;
2006-09-21 16:40:51 +00:00
# ifdef INADDR_NONE
2006-09-22 12:59:17 +00:00
if ( ip = = INADDR_NONE ) {
2006-09-21 16:40:51 +00:00
# else
2006-09-22 12:59:17 +00:00
if ( ip = = ( in_addr_t ) - 1 ) {
2006-09-21 16:40:51 +00:00
# endif
2006-09-22 12:59:17 +00:00
struct hostent h ;
2006-09-22 18:37:22 +00:00
if ( ( my_r_gethostbyname ( site , & h , buf , sizeof ( buf ) ) ! = 0 ) | |
( h . h_addr_list = = NULL ) | |
( h . h_addr = = NULL ) ) {
2006-09-22 12:59:17 +00:00
cli_dbgmsg ( " Unknown host %s \n " , site ) ;
2006-09-21 16:40:51 +00:00
fclose ( fp ) ;
return NULL ;
}
2006-09-22 12:59:17 +00:00
memcpy ( ( char * ) & ip , h . h_addr , sizeof ( ip ) ) ;
2006-09-21 16:40:51 +00:00
}
2006-09-22 12:59:17 +00:00
server . sin_addr . s_addr = ip ;
if ( ( sd = socket ( AF_INET , SOCK_STREAM , tcp ) ) < 0 ) {
fclose ( fp ) ;
return NULL ;
}
flags = nonblock_fcntl ( sd ) ;
if ( nonblock_connect ( sd , ( struct sockaddr * ) & server , sizeof ( struct sockaddr_in ) , 5 ) < 0 ) {
close ( sd ) ;
fclose ( fp ) ;
return NULL ;
}
restore_fcntl ( sd , flags ) ;
/*
* TODO : consider HTTP / 1.1
*/
2006-09-26 16:43:17 +00:00
if ( via_proxy )
2006-09-22 12:59:17 +00:00
snprintf ( buf , sizeof ( buf ) - 1 ,
" GET %s HTTP/1.0 \n User-Agent: www.clamav.net \n \n " , url ) ;
else
snprintf ( buf , sizeof ( buf ) - 1 ,
" GET /%s HTTP/1.0 \n User-Agent: www.clamav.net \n \n " , url ) ;
2006-09-21 16:40:51 +00:00
2006-09-26 16:43:17 +00:00
cli_dbgmsg ( " %s \n " , buf ) ;
2006-09-21 16:40:51 +00:00
if ( send ( sd , buf , strlen ( buf ) , 0 ) < 0 ) {
close ( sd ) ;
fclose ( fp ) ;
return NULL ;
}
shutdown ( sd , SHUT_WR ) ;
2006-09-22 18:37:22 +00:00
firstpacket = 1 ;
2006-09-21 16:40:51 +00:00
for ( ; ; ) {
fd_set set ;
struct timeval tv ;
FD_ZERO ( & set ) ;
FD_SET ( sd , & set ) ;
tv . tv_sec = 30 ; /* FIXME: make this customisable */
tv . tv_usec = 0 ;
if ( select ( sd + 1 , & set , NULL , NULL , & tv ) < 0 ) {
if ( errno = = EINTR )
continue ;
close ( sd ) ;
fclose ( fp ) ;
return NULL ;
}
if ( ! FD_ISSET ( sd , & set ) ) {
fclose ( fp ) ;
close ( sd ) ;
return NULL ;
}
n = recv ( sd , buf , BUFSIZ , 0 ) ;
2006-09-22 18:37:22 +00:00
2006-09-21 16:40:51 +00:00
if ( n < 0 ) {
fclose ( fp ) ;
close ( sd ) ;
return NULL ;
}
if ( n = = 0 )
break ;
2006-09-26 19:41:43 +00:00
/*
* FIXME : Handle header in more than one packet
*/
2006-09-22 18:37:22 +00:00
if ( firstpacket ) {
char * statusptr ;
buf [ n ] = ' \0 ' ;
statusptr = cli_strtok ( buf , 1 , " " ) ;
if ( statusptr ) {
int status = atoi ( statusptr ) ;
cli_dbgmsg ( " HTTP status %d \n " , status ) ;
free ( statusptr ) ;
2006-09-26 16:43:17 +00:00
if ( ( status = = 301 ) | | ( status = = 302 ) ) {
char * location ;
location = strstr ( buf , " \n Location: " ) ;
if ( location ) {
char * end ;
fclose ( fp ) ;
close ( sd ) ;
unlink ( fout ) ;
location + = 11 ;
free ( arg - > url ) ;
end = location ;
while ( * end & & ( * end ! = ' \n ' ) )
end + + ;
* end = ' \0 ' ;
arg - > url = strdup ( location ) ;
cli_dbgmsg ( " Redirecting to %s \n " , arg - > url ) ;
return getURL ( arg ) ;
}
}
2006-09-22 18:37:22 +00:00
}
2006-09-26 19:41:43 +00:00
/*
* Don ' t write the HTTP header
*/
ptr = strstr ( buf , " \n \n " ) ;
if ( ptr ! = NULL ) {
ptr + = 2 ;
n - = ( int ) ( ptr - buf ) ;
} else
ptr = buf ;
2006-09-22 18:37:22 +00:00
firstpacket = 0 ;
2006-09-26 19:41:43 +00:00
} else
ptr = buf ;
2006-09-22 18:37:22 +00:00
2006-09-26 19:41:43 +00:00
if ( fwrite ( ptr , n , 1 , fp ) ! = 1 ) {
2006-09-21 16:40:51 +00:00
cli_warnmsg ( " Error writing %d bytes to %s \n " ,
n , fout ) ;
break ;
}
}
fclose ( fp ) ;
close ( sd ) ;
return NULL ;
}
2006-09-22 12:59:17 +00:00
/*
* Have a copy here because r_gethostbyname is in shared not libclamav : - (
*/
static int
my_r_gethostbyname ( const char * hostname , struct hostent * hp , char * buf , size_t len )
{
# if defined(HAVE_GETHOSTBYNAME_R_6)
/* e.g. Linux */
struct hostent * hp2 ;
int ret = - 1 ;
if ( ( hostname = = NULL ) | | ( hp = = NULL ) )
return - 1 ;
if ( gethostbyname_r ( hostname , hp , buf , len , & hp2 , & ret ) < 0 )
return ret ;
# elif defined(HAVE_GETHOSTBYNAME_R_5)
/* e.g. BSD, Solaris, Cygwin */
int ret = - 1 ;
if ( ( hostname = = NULL ) | | ( hp = = NULL ) )
return - 1 ;
if ( gethostbyname_r ( hostname , hp , buf , len , & ret ) = = NULL )
return ret ;
# elif defined(HAVE_GETHOSTBYNAME_R_3)
/* e.g. HP/UX, AIX */
if ( ( hostname = = NULL ) | | ( hp = = NULL ) )
return - 1 ;
if ( gethostbyname_r ( hostname , & hp , ( struct hostent_data * ) buf ) < 0 )
return h_errno ;
# else
/* Single thread the code */
struct hostent * hp2 ;
# ifdef CL_THREAD_SAFE
static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER ;
# endif
if ( ( hostname = = NULL ) | | ( hp = = NULL ) )
return - 1 ;
# ifdef CL_THREAD_SAFE
pthread_mutex_lock ( & hostent_mutex ) ;
# endif
if ( ( hp2 = gethostbyname ( hostname ) ) = = NULL ) {
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & hostent_mutex ) ;
# endif
return h_errno ;
}
memcpy ( hp , hp2 , sizeof ( struct hostent ) ) ;
# ifdef CL_THREAD_SAFE
pthread_mutex_unlock ( & hostent_mutex ) ;
# endif
# endif
return 0 ;
}
2006-09-21 16:40:51 +00:00
static long
nonblock_fcntl ( int sock )
{
long fcntl_flags ; /* Save fcntl() flags */
fcntl_flags = fcntl ( sock , F_GETFL , 0 ) ;
if ( fcntl_flags < 0 )
cli_warnmsg ( " nonblock_fcntl: saving: fcntl(%d, F_GETFL): errno=%d: %s \n " ,
sock , errno , strerror ( errno ) ) ;
else if ( fcntl ( sock , F_SETFL , fcntl_flags | O_NONBLOCK ) )
cli_warnmsg ( " nonblock_fcntl: fcntl(%d, F_SETFL, O_NONBLOCK): errno=%d: %s \n " ,
sock , errno , strerror ( errno ) ) ;
return fcntl_flags ;
}
static void
restore_fcntl ( int sock , long fcntl_flags )
{
2006-09-22 18:37:22 +00:00
if ( fcntl_flags ! = - 1 )
if ( fcntl ( sock , F_SETFL , fcntl_flags ) ) {
2006-09-21 16:40:51 +00:00
cli_warnmsg ( " restore_fcntl: restoring: fcntl(%d, F_SETFL): errno=%d: %s \n " ,
sock , errno , strerror ( errno ) ) ;
}
}
static int
nonblock_connect ( int sock , const struct sockaddr * addr , socklen_t addrlen , int secs )
{
/* Max. of unexpected select() failures */
int select_failures = NONBLOCK_SELECT_MAX_FAILURES ;
/* Max. of useless loops */
int bogus_loops = NONBLOCK_MAX_BOGUS_LOOPS ;
2006-09-22 18:37:22 +00:00
struct timeval timeout ; /* When we should time out */
2006-09-21 16:40:51 +00:00
int numfd ; /* Highest fdset fd plus 1 */
/* Calculate into 'timeout' when we should time out */
gettimeofday ( & timeout , 0 ) ;
timeout . tv_sec + = secs ;
/* Launch (possibly) non-blocking connect() request */
if ( connect ( sock , addr , addrlen ) ) {
int e = errno ;
cli_dbgmsg ( " DEBUG nonblock_connect: connect(): fd=%d errno=%d: %s \n " ,
sock , e , strerror ( e ) ) ;
switch ( e ) {
case EALREADY :
case EINPROGRESS :
break ; /* wait for connection */
case EISCONN :
return 0 ; /* connected */
default :
cli_warnmsg ( " nonblock_connect: connect(): fd=%d errno=%d: %s \n " ,
sock , e , strerror ( e ) ) ;
return - 1 ; /* failed */
}
} else
return connect_error ( sock ) ;
numfd = sock + 1 ; /* Highest fdset fd plus 1 */
for ( ; ; ) {
fd_set fds ;
struct timeval now ;
struct timeval wait ;
int n ;
/* Force timeout if we ran out of time */
gettimeofday ( & now , 0 ) ;
if ( timercmp ( & now , & timeout , > ) ) {
cli_warnmsg ( " connect timing out (%d secs) \n " ,
secs ) ;
break ; /* failed */
}
/* Calculate into 'wait' how long to wait */
timersub ( & timeout , & now , & wait ) ; /* wait = timeout - now */
/* Init fds with 'sock' as the only fd */
FD_ZERO ( & fds ) ;
FD_SET ( sock , & fds ) ;
n = select ( numfd , 0 , & fds , 0 , & wait ) ;
if ( n < 0 ) {
cli_warnmsg ( " nonblock_connect: select() failure %d: errno=%d: %s \n " ,
select_failures , errno , strerror ( errno ) ) ;
if ( - - select_failures > = 0 )
continue ; /* keep waiting */
break ; /* failed */
}
cli_dbgmsg ( " DEBUG nonblock_connect: select = %d \n " , n ) ;
2006-09-22 08:17:25 +00:00
if ( n )
2006-09-21 16:40:51 +00:00
return connect_error ( sock ) ;
/* Select returned, but there is no work to do... */
if ( - - bogus_loops < 0 ) {
cli_warnmsg ( " nonblock_connect: giving up due to excessive bogus loops \n " ) ;
break ; /* failed */
}
} /* for loop: keep waiting */
return - 1 ; /* failed */
}
static int
connect_error ( int sock )
{
int optval ;
socklen_t optlen ;
optlen = sizeof ( optval ) ;
getsockopt ( sock , SOL_SOCKET , SO_ERROR , & optval , & optlen ) ;
if ( optval )
cli_warnmsg ( " connect_error: getsockopt(SO_ERROR): fd=%d error=%d: %s \n " ,
sock , optval , strerror ( optval ) ) ;
return optval ? - 1 : 0 ;
}
# else
2006-09-21 09:37:47 +00:00
static int curl_has_segfaulted ;
/*
* Inspite of numerious bug reports , curl is still buggy : - (
* For a fuller explanation , read the long comment at the top , including
* the valgrind evidence
*/
static void
curlsegv ( int sig )
{
curl_has_segfaulted = 1 ;
}
2004-08-18 21:39:36 +00:00
static void *
# ifdef CL_THREAD_SAFE
getURL ( void * a )
# else
getURL ( struct arg * arg )
# endif
2004-08-10 08:17:19 +00:00
{
FILE * fp ;
2004-08-12 10:37:53 +00:00
struct curl_slist * headers ;
2004-08-18 21:39:36 +00:00
# ifdef CL_THREAD_SAFE
struct arg * arg = ( struct arg * ) a ;
# endif
const char * url = arg - > url ;
const char * dir = arg - > dir ;
2006-09-21 14:42:06 +00:00
CURL * curl = arg - > curl ;
2004-08-18 21:39:36 +00:00
const char * filename = arg - > filename ;
2004-11-27 21:56:41 +00:00
char fout [ NAME_MAX + 1 ] ;
2006-09-21 14:42:06 +00:00
void ( * oldsegv ) ( int ) ;
2005-04-07 16:38:37 +00:00
# ifdef CURLOPT_ERRORBUFFER
2006-09-20 10:24:17 +00:00
char errorbuffer [ CURL_ERROR_SIZE + 1 ] ;
2006-01-02 18:10:32 +00:00
# elif (LIBCURL_VERSION_NUM >= 0x070C00)
2005-12-28 13:51:36 +00:00
CURLcode res = CURLE_OK ;
2005-04-07 16:38:37 +00:00
# endif
2004-08-10 08:17:19 +00:00
2004-08-12 10:37:53 +00:00
( void ) curl_easy_setopt ( curl , CURLOPT_USERAGENT , " www.clamav.net " ) ;
2006-02-06 02:36:39 +00:00
if ( curl_easy_setopt ( curl , CURLOPT_URL , url ) ! = 0 ) {
2006-09-20 10:24:17 +00:00
cli_errmsg ( " %s: curl_easy_setopt failed \n " , url ) ;
2004-08-18 21:39:36 +00:00
return NULL ;
2006-02-06 02:36:39 +00:00
}
2004-08-12 10:37:53 +00:00
2006-09-20 10:24:17 +00:00
snprintf ( fout , sizeof ( fout ) - 1 , " %s/%s " , dir , filename ) ;
2004-08-10 08:17:19 +00:00
2006-09-21 14:42:06 +00:00
cli_dbgmsg ( " Saving %s to %s \n " , url , fout ) ;
2005-12-28 13:51:36 +00:00
fp = fopen ( fout , " wb " ) ;
2004-08-10 08:17:19 +00:00
if ( fp = = NULL ) {
2004-10-10 11:12:28 +00:00
cli_errmsg ( " Can't open '%s' for writing " , fout ) ;
2004-08-18 21:39:36 +00:00
return NULL ;
2004-08-10 08:17:19 +00:00
}
2004-09-21 12:22:07 +00:00
# ifdef CURLOPT_WRITEDATA
2004-08-18 21:39:36 +00:00
if ( curl_easy_setopt ( curl , CURLOPT_WRITEDATA , fp ) ! = 0 ) {
fclose ( fp ) ;
return NULL ;
}
2004-09-21 12:22:07 +00:00
# else
if ( curl_easy_setopt ( curl , CURLOPT_FILE , fp ) ! = 0 ) {
fclose ( fp ) ;
return NULL ;
}
# endif
2004-08-18 21:39:36 +00:00
2004-08-12 10:37:53 +00:00
/*
2004-08-18 15:56:38 +00:00
* If an item is in squid ' s cache get it from there ( TCP_HIT / 200 )
2004-08-12 10:37:53 +00:00
* by default curl doesn ' t ( TCP_CLIENT_REFRESH_MISS / 200 )
*/
headers = curl_slist_append ( NULL , " Pragma: " ) ;
curl_easy_setopt ( curl , CURLOPT_HTTPHEADER , headers ) ;
2004-08-10 08:17:19 +00:00
2004-08-12 10:37:53 +00:00
/* These should be customisable */
curl_easy_setopt ( curl , CURLOPT_TIMEOUT , 30 ) ;
curl_easy_setopt ( curl , CURLOPT_CONNECTTIMEOUT , 10 ) ;
2004-08-17 08:31:58 +00:00
# ifdef CURLOPT_MAXFILESIZE
curl_easy_setopt ( curl , CURLOPT_MAXFILESIZE , 50 * 1024 ) ;
# endif
2004-08-10 08:17:19 +00:00
2004-08-18 21:39:36 +00:00
# ifdef CL_THREAD_SAFE
2004-09-22 16:12:08 +00:00
# ifdef CURLOPT_DNS_USE_GLOBAL_CACHE
2006-09-21 14:42:06 +00:00
/* Apparently this is depracated */
/*curl_easy_setopt(curl, CURLOPT_DNS_USE_GLOBAL_CACHE, 0);*/
# endif
# endif
# ifdef CL_THREAD_SAFE
# ifdef CURLOPT_NOSIGNAL
curl_easy_setopt ( curl , CURLOPT_NOSIGNAL , 1 ) ;
2004-09-22 16:12:08 +00:00
# endif
2004-08-18 21:39:36 +00:00
# endif
2004-09-21 20:50:11 +00:00
/*
* Prevent password : prompting with older versions
* FIXME : a better username ?
*/
2004-09-22 15:24:52 +00:00
curl_easy_setopt ( curl , CURLOPT_USERPWD , " username:password " ) ;
2004-09-21 20:50:11 +00:00
2004-08-18 21:39:36 +00:00
/*
* FIXME : valgrind reports " pthread_mutex_unlock: mutex is not locked "
* from gethostbyaddr_r within this . It may be a bug in libcurl
* rather than this code , but I need to check , see Curl_resolv ( )
* If pushed really hard it will sometimes say
* Conditional jump or move depends on uninitialised value ( s ) and
* quit . But the program seems to work OK without valgrind . . .
2005-08-12 12:00:06 +00:00
* Perhaps Curl_resolv ( ) isn ' t thread safe ?
*
* I have seen segfaults in version 7.12 .3 . Version 7.14 seems OK .
2004-08-18 21:39:36 +00:00
*/
2004-11-18 18:09:08 +00:00
/*
2004-11-26 12:05:17 +00:00
* On some C libraries ( notably with FC3 , glibc - 2.3 .3 - 74 ) you get a
2006-02-06 02:36:39 +00:00
* memory leak here in getaddrinfo ( ) , see
2004-11-22 15:19:43 +00:00
* https : //bugzilla.redhat.com/bugzilla/show_bug.cgi?id=139559
2004-11-18 18:09:08 +00:00
*/
2006-09-21 14:42:06 +00:00
curl_has_segfaulted = 0 ;
oldsegv = signal ( SIGSEGV , curlsegv ) ;
2005-04-07 16:38:37 +00:00
# ifdef CURLOPT_ERRORBUFFER
2006-01-02 18:10:32 +00:00
curl_easy_setopt ( curl , CURLOPT_ERRORBUFFER , errorbuffer ) ;
2005-12-28 13:51:37 +00:00
if ( curl_easy_perform ( curl ) ! = CURLE_OK )
2005-04-07 16:38:37 +00:00
cli_warnmsg ( " URL %s failed to download: %s \n " , url , errorbuffer ) ;
2006-01-02 18:03:31 +00:00
# elif (LIBCURL_VERSION_NUM >= 0x070C00)
2005-12-28 13:51:37 +00:00
if ( ( res = curl_easy_perform ( curl ) ) ! = CURLE_OK )
2005-12-28 13:51:36 +00:00
cli_warnmsg ( " URL %s failed to download: %s \n " , url ,
curl_easy_strerror ( res ) ) ;
2006-01-02 18:03:31 +00:00
# else
2006-01-02 18:10:32 +00:00
if ( curl_easy_perform ( curl ) ! = CURLE_OK )
2006-01-02 18:03:31 +00:00
cli_warnmsg ( " URL %s failed to download \n " , url ) ;
2005-04-07 16:38:37 +00:00
# endif
2004-08-12 10:37:53 +00:00
fclose ( fp ) ;
2005-08-03 21:15:19 +00:00
curl_slist_free_all ( headers ) ;
2004-08-18 21:39:36 +00:00
2006-09-21 14:42:06 +00:00
if ( curl_has_segfaulted )
cli_warnmsg ( " Libcurl has segfaulted on '%s' \n " , url ) ;
signal ( SIGSEGV , oldsegv ) ;
2004-08-18 21:39:36 +00:00
return NULL ;
2004-08-10 08:17:19 +00:00
}
2006-09-21 16:40:51 +00:00
# endif
2006-09-21 09:37:47 +00:00
2004-08-10 08:17:19 +00:00
# endif
# endif
2004-06-28 11:47:16 +00:00
# ifdef HAVE_BACKTRACE
2004-08-11 14:48:13 +00:00
static void
2004-06-22 04:08:02 +00:00
sigsegv ( int sig )
{
signal ( SIGSEGV , SIG_DFL ) ;
2004-06-28 11:47:16 +00:00
print_trace ( 1 ) ;
2004-06-22 04:08:02 +00:00
exit ( SIGSEGV ) ;
}
2004-08-11 14:48:13 +00:00
static void
2004-06-22 04:08:02 +00:00
print_trace ( int use_syslog )
{
void * array [ 10 ] ;
size_t size ;
char * * strings ;
size_t i ;
pid_t pid = getpid ( ) ;
size = backtrace ( array , 10 ) ;
strings = backtrace_symbols ( array , size ) ;
if ( use_syslog = = 0 )
cli_dbgmsg ( " Backtrace of pid %d: \n " , pid ) ;
2004-06-25 13:58:41 +00:00
else
2004-06-22 04:08:02 +00:00
syslog ( LOG_ERR , " Backtrace of pid %d: " , pid ) ;
for ( i = 0 ; i < size ; i + + )
if ( use_syslog )
2005-01-01 15:55:26 +00:00
syslog ( LOG_ERR , " bt[%u]: %s " , i , strings [ i ] ) ;
2004-06-22 04:08:02 +00:00
else
cli_dbgmsg ( " %s \n " , strings [ i ] ) ;
2004-09-06 11:05:44 +00:00
/* TODO: dump the current email */
2004-06-22 04:08:02 +00:00
free ( strings ) ;
}
# endif
2004-12-16 15:29:08 +00:00
2006-08-25 10:10:13 +00:00
/* See also clamav-milter */
2004-12-18 16:34:31 +00:00
static bool
usefulHeader ( int commandNumber , const char * cmd )
{
switch ( commandNumber ) {
case CONTENT_TRANSFER_ENCODING :
case CONTENT_DISPOSITION :
case CONTENT_TYPE :
return TRUE ;
default :
if ( strcasecmp ( cmd , " From " ) = = 0 )
return TRUE ;
2006-08-25 10:10:13 +00:00
if ( strcasecmp ( cmd , " Received " ) = = 0 )
2004-12-18 16:34:31 +00:00
return TRUE ;
2006-08-25 10:10:13 +00:00
if ( strcasecmp ( cmd , " De " ) = = 0 )
2004-12-18 16:34:31 +00:00
return TRUE ;
}
return FALSE ;
}
2005-04-19 09:23:12 +00:00
/*
* Like fgets but cope with end of line by " \n " , " \r \n " , " \n \r " , " \r "
*/
static char *
2005-07-16 15:53:29 +00:00
getline_from_mbox ( char * buffer , size_t len , FILE * fin )
2005-04-19 09:23:12 +00:00
{
char * ret ;
if ( feof ( fin ) )
return NULL ;
if ( ( len = = 0 ) | | ( buffer = = NULL ) ) {
2006-05-28 09:31:22 +00:00
cli_errmsg ( " Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs.html#pagestart \n " ) ;
2005-04-19 09:23:12 +00:00
return NULL ;
}
ret = buffer ;
do {
int c = getc ( fin ) ;
if ( ferror ( fin ) )
return NULL ;
switch ( c ) {
case ' \n ' :
* buffer + + = ' \n ' ;
c = getc ( fin ) ;
if ( ( c ! = ' \r ' ) & & ! feof ( fin ) )
ungetc ( c , fin ) ;
break ;
default :
2005-04-27 09:58:18 +00:00
* buffer + + = ( char ) c ;
2005-04-19 09:23:12 +00:00
continue ;
case EOF :
break ;
case ' \r ' :
* buffer + + = ' \n ' ;
c = getc ( fin ) ;
if ( ( c ! = ' \n ' ) & & ! feof ( fin ) )
ungetc ( c , fin ) ;
break ;
}
break ;
2005-04-28 14:46:44 +00:00
} while ( - - len > 1 ) ;
2005-04-19 09:23:12 +00:00
if ( len = = 0 ) {
2005-04-21 11:13:41 +00:00
/* the email probably breaks RFC821 */
2006-09-21 14:42:06 +00:00
cli_warnmsg ( " getline_from_mbox: buffer overflow stopped, line lost \n " ) ;
2005-04-19 09:23:12 +00:00
return NULL ;
}
2006-09-21 14:42:06 +00:00
* buffer = ' \0 ' ;
2005-04-28 14:46:44 +00:00
if ( len = = 1 )
2006-05-28 09:31:22 +00:00
/* overflows will have appeared on separate lines */
2006-09-22 18:37:22 +00:00
cli_dbgmsg ( " getline_from_mbox: buffer overflow stopped, line recovered \n " ) ;
2005-04-19 09:23:12 +00:00
return ret ;
}
2006-05-02 15:21:59 +00:00
2006-05-03 15:41:44 +00:00
/*
* Is this line a candidate for the start of a bounce message ?
*/
2006-05-02 15:21:59 +00:00
static bool
2006-05-03 15:41:44 +00:00
isBounceStart ( const char * line )
2006-05-02 15:21:59 +00:00
{
if ( line = = NULL )
return FALSE ;
if ( * line = = ' \0 ' )
return FALSE ;
2006-05-04 17:44:29 +00:00
/*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
2006-05-02 15:21:59 +00:00
return FALSE ;
if ( ( strncmp ( line , " >From " , 6 ) = = 0 ) & & ! isalnum ( line [ 6 ] ) )
2006-05-04 17:44:29 +00:00
return FALSE ; */
2006-05-02 15:21:59 +00:00
if ( cli_filetype ( line , strlen ( line ) ) ! = CL_TYPE_MAIL )
return FALSE ;
if ( ( strncmp ( line , " From " , 5 ) = = 0 ) | |
( strncmp ( line , " >From " , 6 ) = = 0 ) ) {
int numSpaces = 0 , numDigits = 0 ;
do
if ( * line = = ' ' )
numSpaces + + ;
else if ( isdigit ( * line ) )
numDigits + + ;
while ( * + + line ! = ' \0 ' ) ;
if ( numSpaces < 6 )
return FALSE ;
if ( numDigits < 11 )
return FALSE ;
}
return TRUE ;
}
2006-06-28 16:06:07 +00:00
/*
* Extract a binhexEncoded message , return if it ' s found to be infected as we
* extract it
*/
static bool
2006-07-04 08:40:46 +00:00
exportBinhexMessage ( const char * dir , message * m )
2006-06-28 16:06:07 +00:00
{
bool infected = FALSE ;
fileblob * fb ;
if ( messageGetEncoding ( m ) = = NOENCODING )
messageSetEncoding ( m , " x-binhex " ) ;
2006-07-01 16:21:03 +00:00
fb = messageToFileblob ( m , dir , 0 ) ;
2006-06-28 16:06:07 +00:00
if ( fb ) {
if ( fileblobContainsVirus ( fb ) )
infected = TRUE ;
cli_dbgmsg ( " Binhex file decoded to %s \n " ,
fileblobGetFilename ( fb ) ) ;
fileblobDestroy ( fb ) ;
} else
cli_errmsg ( " Couldn't decode binhex file to %s \n " , dir ) ;
return infected ;
}
2006-06-28 21:07:36 +00:00
2006-07-04 08:40:46 +00:00
/*
* Locate any bounce message and extract it . Return 1 if anything found
*/
static int
exportBounceMessage ( text * start , const mbox_ctx * mctx )
{
int rc = 0 ;
text * t ;
fileblob * fb ;
/*
* Attempt to save the original ( unbounced )
* message - clamscan will find that in the
* directory and call us again ( with any luck )
* having found an e - mail message to handle .
*
* This finds a lot of false positives , the
* search that a content type is in the
* bounce ( i . e . it ' s after the bounce header )
* helps a bit .
*
* messageAddLine
* optimisation could help here , but needs
* careful thought , do it with line numbers
* would be best , since the current method in
* messageAddLine of checking encoding first
* must remain otherwise non bounce messages
* won ' t be scanned
*/
for ( t = start ; t ; t = t - > t_next ) {
char cmd [ RFC2821LENGTH + 1 ] ;
const char * txt = lineGetData ( t - > t_line ) ;
if ( txt = = NULL )
continue ;
if ( cli_strtokbuf ( txt , 0 , " : " , cmd ) = = NULL )
continue ;
switch ( tableFind ( mctx - > rfc821Table , cmd ) ) {
case CONTENT_TRANSFER_ENCODING :
if ( ( strstr ( txt , " 7bit " ) = = NULL ) & &
( strstr ( txt , " 8bit " ) = = NULL ) )
break ;
continue ;
case CONTENT_DISPOSITION :
break ;
case CONTENT_TYPE :
if ( strstr ( txt , " text/plain " ) ! = NULL )
t = NULL ;
break ;
default :
if ( strcasecmp ( cmd , " From " ) = = 0 )
start = t ;
else if ( strcasecmp ( cmd , " Received " ) = = 0 )
start = t ;
continue ;
}
break ;
}
if ( t & & ( ( fb = fileblobCreate ( ) ) ! = NULL ) ) {
cli_dbgmsg ( " Found a bounce message \n " ) ;
fileblobSetFilename ( fb , mctx - > dir , " bounce " ) ;
/*fileblobSetCTX(fb, mctx->ctx);*/
if ( textToFileblob ( start , fb , 1 ) = = NULL )
cli_dbgmsg ( " Nothing new to save in the bounce message \n " ) ;
else
rc = 1 ;
fileblobDestroy ( fb ) ;
} else
cli_dbgmsg ( " Not found a bounce message \n " ) ;
return rc ;
}
2006-06-28 21:07:36 +00:00
/*
* Handle the ith element of a number of multiparts , e . g . multipart / alternative
*/
static message *
do_multipart ( message * mainMessage , message * * messages , int i , int * rc , mbox_ctx * mctx , message * messageIn , text * * tptr )
{
bool addToText = FALSE ;
const char * dtype ;
# ifndef SAVE_TO_DISC
message * body ;
# endif
message * aMessage = messages [ i ] ;
if ( aMessage = = NULL )
return mainMessage ;
cli_dbgmsg ( " Mixed message part %d is of type %d \n " ,
i , messageGetMimeType ( aMessage ) ) ;
switch ( messageGetMimeType ( aMessage ) ) {
case APPLICATION :
case AUDIO :
case IMAGE :
case VIDEO :
break ;
case NOMIME :
cli_dbgmsg ( " No mime headers found in multipart part %d \n " , i ) ;
if ( mainMessage ) {
if ( binhexBegin ( aMessage ) ) {
cli_dbgmsg ( " Found binhex message in multipart/mixed mainMessage \n " ) ;
2006-07-04 08:40:46 +00:00
if ( exportBinhexMessage ( mctx - > dir , mainMessage ) )
2006-06-28 21:07:36 +00:00
* rc = 3 ;
}
if ( mainMessage ! = messageIn )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
} else if ( aMessage ) {
if ( binhexBegin ( aMessage ) ) {
cli_dbgmsg ( " Found binhex message in multipart/mixed non mime part \n " ) ;
2006-07-04 08:40:46 +00:00
if ( exportBinhexMessage ( mctx - > dir , aMessage ) )
2006-06-28 21:07:36 +00:00
* rc = 3 ;
assert ( aMessage = = messages [ i ] ) ;
messageReset ( messages [ i ] ) ;
}
}
addToText = TRUE ;
if ( messageGetBody ( aMessage ) = = NULL )
/*
* No plain text version
*/
cli_dbgmsg ( " No plain text alternative \n " ) ;
break ;
case TEXT :
dtype = messageGetDispositionType ( aMessage ) ;
cli_dbgmsg ( " Mixed message text part disposition \" %s \" \n " ,
dtype ) ;
if ( strcasecmp ( dtype , " attachment " ) = = 0 )
break ;
if ( ( * dtype = = ' \0 ' ) | | ( strcasecmp ( dtype , " inline " ) = = 0 ) ) {
const char * cptr ;
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
cptr = messageGetMimeSubtype ( aMessage ) ;
cli_dbgmsg ( " Mime subtype \" %s \" \n " , cptr ) ;
if ( ( tableFind ( mctx - > subtypeTable , cptr ) = = PLAIN ) & &
( messageGetEncoding ( aMessage ) = = NOENCODING ) ) {
char * filename ;
/*
* Strictly speaking
* a text / plain part is
* not an attachment . We
* pretend it is so that
* we can decode and
* scan it
*/
filename = ( char * ) messageFindArgument ( aMessage , " filename " ) ;
if ( filename = = NULL )
filename = ( char * ) messageFindArgument ( aMessage , " name " ) ;
if ( filename = = NULL ) {
cli_dbgmsg ( " Adding part to main message \n " ) ;
addToText = TRUE ;
} else {
cli_dbgmsg ( " Treating %s as attachment \n " ,
filename ) ;
free ( filename ) ;
}
} else {
2006-09-17 11:56:26 +00:00
const int is_html = ( tableFind ( mctx - > subtypeTable , cptr ) = = HTML ) ;
if ( ( mctx - > ctx - > options & CL_SCAN_MAILURL ) & & is_html )
checkURLs ( aMessage , mctx , rc , 1 ) ;
2006-09-21 14:42:06 +00:00
# ifdef CL_EXPERIMENTAL
2006-09-17 11:56:26 +00:00
else if ( ! ( mctx - > ctx - > options & CL_SCAN_NOPHISHING ) )
checkURLs ( aMessage , mctx , rc , is_html ) ;
2006-09-21 14:42:06 +00:00
# endif
2006-06-28 21:07:36 +00:00
messageAddArgument ( aMessage ,
" filename=mixedtextportion " ) ;
}
break ;
}
cli_dbgmsg ( " Text type %s is not supported \n " , dtype ) ;
return mainMessage ;
case MESSAGE :
/* Content-Type: message/rfc822 */
cli_dbgmsg ( " Found message inside multipart (encoding type %d) \n " ,
messageGetEncoding ( aMessage ) ) ;
# ifndef SCAN_UNENCODED_BOUNCES
switch ( messageGetEncoding ( aMessage ) ) {
case NOENCODING :
case EIGHTBIT :
case BINARY :
if ( encodingLine ( aMessage ) = = NULL ) {
/*
* This means that the message
* has no attachments
*
* The test for
* messageGetEncoding is needed
* since encodingLine won ' t have
* been set if the message
* itself has been encoded
*/
cli_dbgmsg ( " Unencoded multipart/message will not be scanned \n " ) ;
assert ( aMessage = = messages [ i ] ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
return mainMessage ;
}
/* FALLTHROUGH */
default :
cli_dbgmsg ( " Encoded multipart/message will be scanned \n " ) ;
}
# endif
#if 0
messageAddStrAtTop ( aMessage ,
" Received: by clamd (message/rfc822) " ) ;
# endif
# ifdef SAVE_TO_DISC
/*
* Save this embedded message
* to a temporary file
*/
2006-07-03 09:19:15 +00:00
saveTextPart ( aMessage , mctx - > dir , 1 ) ;
2006-06-28 21:07:36 +00:00
assert ( aMessage = = messages [ i ] ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
# else
/*
2006-07-30 10:10:40 +00:00
* Scan in memory , faster but is open to DoS attacks
* when many nested levels are involved .
2006-06-28 21:07:36 +00:00
*/
body = parseEmailHeaders ( aMessage , mctx - > rfc821Table ,
TRUE ) ;
/*
* We ' ve fininished with the
* original copy of the message ,
* so throw that away and
* deal with the encapsulated
* message as a message .
* This can save a lot of memory
*/
assert ( aMessage = = messages [ i ] ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
if ( body ) {
messageSetCTX ( body , ctx ) ;
rc = parseEmailBody ( body , NULL , mctx ) ;
if ( messageContainsVirus ( body ) )
* rc = 3 ;
messageDestroy ( body ) ;
}
# endif
return mainMessage ;
case MULTIPART :
/*
* It ' s a multi part within a multi part
* Run the message parser on this bit , it won ' t
* be an attachment
*/
cli_dbgmsg ( " Found multipart inside multipart \n " ) ;
if ( aMessage ) {
/*
* The headers were parsed when reading in the
* whole multipart section
*/
* rc = parseEmailBody ( aMessage , * tptr , mctx ) ;
cli_dbgmsg ( " Finished recursion \n " ) ;
assert ( aMessage = = messages [ i ] ) ;
messageDestroy ( messages [ i ] ) ;
messages [ i ] = NULL ;
} else {
* rc = parseEmailBody ( NULL , NULL , mctx ) ;
if ( mainMessage & & ( mainMessage ! = messageIn ) )
messageDestroy ( mainMessage ) ;
mainMessage = NULL ;
}
return mainMessage ;
default :
cli_warnmsg ( " Only text and application attachments are supported, type = %d \n " ,
messageGetMimeType ( aMessage ) ) ;
return mainMessage ;
}
if ( addToText ) {
cli_dbgmsg ( " Adding to non mime-part \n " ) ;
* tptr = textAdd ( * tptr , messageGetBody ( aMessage ) ) ;
} else {
2006-07-01 16:21:03 +00:00
fileblob * fb = messageToFileblob ( aMessage , mctx - > dir , 1 ) ;
2006-06-28 21:07:36 +00:00
if ( fb ) {
if ( fileblobContainsVirus ( fb ) )
* rc = 3 ;
fileblobDestroy ( fb ) ;
}
}
if ( messageContainsVirus ( aMessage ) )
* rc = 3 ;
messageDestroy ( aMessage ) ;
messages [ i ] = NULL ;
return mainMessage ;
}
2006-07-12 21:21:25 +00:00
/*
* Returns the number of quote characters in the given string
*/
static int
count_quotes ( const char * buf )
{
int quotes = 0 ;
while ( * buf )
if ( * buf + + = = ' \" ' )
quotes + + ;
return quotes ;
}
2006-07-24 12:14:46 +00:00
/*
* Will the next line be a folded header ? See RFC2822 section 2.2 .3
*/
static bool
next_is_folded_header ( const text * t )
{
const text * next = t - > t_next ;
const char * data , * ptr ;
if ( next = = NULL )
return FALSE ;
if ( next - > t_line = = NULL )
return FALSE ;
data = lineGetData ( next - > t_line ) ;
/*
2006-07-30 10:10:40 +00:00
* Section B .2 of RFC822 says TAB or SPACE means a continuation of the
2006-07-24 12:14:46 +00:00
* previous entry .
*/
if ( isblank ( data [ 0 ] ) )
return TRUE ;
if ( strchr ( data , ' = ' ) = = NULL )
/*
* Avoid false positives with
* Content - Type : text / html ;
* Content - Transfer - Encoding : quoted - printable
*/
return FALSE ;
2006-07-25 15:09:45 +00:00
2006-07-24 12:14:46 +00:00
/*
* Some are broken and don ' t fold headers lines
* correctly as per section 2.2 .3 of RFC2822 .
* Generally they miss the white space at
* the start of the fold line :
* Content - Type : multipart / related ;
* type = " multipart/alternative " ;
* boundary = " ----=_NextPart_000_006A_01C6AC47.348CB550 "
* should read :
* Content - Type : multipart / related ;
* type = " multipart/alternative " ;
* boundary = " ----=_NextPart_000_006A_01C6AC47.348CB550 "
* Since we ' re a virus checker not an RFC
* verifier we need to handle these
*/
data = lineGetData ( t - > t_line ) ;
ptr = strchr ( data , ' \0 ' ) ;
while ( - - ptr > data )
switch ( * ptr ) {
case ' ; ' :
return TRUE ;
case ' \n ' :
case ' ' :
case ' \r ' :
case ' \t ' :
continue ; /* white space at end of line */
default :
return FALSE ;
}
return FALSE ;
}