2004-07-02 23:00:58 +00:00
/*
2013-10-17 16:54:21 -04:00
* Copyright ( C ) 2007 - 2013 Sourcefire , Inc .
2007-12-14 22:39:37 +00:00
*
2008-04-02 15:24:51 +00:00
* Authors : Tomasz Kojm
2004-07-02 23:00:58 +00:00
*
* This program is free software ; you can redistribute it and / or modify
2007-03-31 20:31:04 +00:00
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
2004-07-02 23:00:58 +00:00
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
2006-04-09 19:59:28 +00:00
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
2004-07-02 23:00:58 +00:00
*/
# if HAVE_CONFIG_H
# include "clamav-config.h"
# endif
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
2006-12-20 15:33:55 +00:00
# include <sys/types.h>
2006-11-15 15:26:54 +00:00
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
2004-07-02 23:00:58 +00:00
# include "clamav.h"
# include "filetypes.h"
2004-07-19 17:54:40 +00:00
# include "others.h"
# include "readdb.h"
2005-11-11 01:31:46 +00:00
# include "matcher-ac.h"
2006-10-25 15:40:47 +00:00
# include "str.h"
2008-01-07 14:20:38 +00:00
# include "textdet.h"
2008-12-29 17:55:30 +00:00
# include "default.h"
2012-09-14 11:42:16 -04:00
# include "iowrap.h"
2014-02-06 19:01:26 -05:00
# include "mbr.h"
2004-07-02 23:00:58 +00:00
2006-12-26 16:17:02 +00:00
# include "htmlnorm.h"
# include "entconv.h"
2008-10-21 23:55:11 +00:00
# include "mpool.h"
2013-10-17 16:54:21 -04:00
# define UNZIP_PRIVATE
# include "unzip.h"
2006-12-26 16:17:02 +00:00
2007-12-14 22:39:37 +00:00
static const struct ftmap_s {
const char * name ;
cli_file_t code ;
} ftmap [ ] = {
2008-01-07 14:20:38 +00:00
{ " CL_TYPE_TEXT_ASCII " , CL_TYPE_TEXT_ASCII } ,
{ " CL_TYPE_TEXT_UTF8 " , CL_TYPE_TEXT_UTF8 } ,
{ " CL_TYPE_TEXT_UTF16LE " , CL_TYPE_TEXT_UTF16LE } ,
{ " CL_TYPE_TEXT_UTF16BE " , CL_TYPE_TEXT_UTF16BE } ,
{ " CL_TYPE_BINARY_DATA " , CL_TYPE_BINARY_DATA } ,
2007-12-14 22:39:37 +00:00
{ " CL_TYPE_IGNORED " , CL_TYPE_IGNORED } ,
2010-01-07 18:26:12 +01:00
{ " CL_TYPE_ANY " , CL_TYPE_ANY } ,
2007-12-14 22:39:37 +00:00
{ " CL_TYPE_MSEXE " , CL_TYPE_MSEXE } ,
{ " CL_TYPE_ELF " , CL_TYPE_ELF } ,
2009-07-08 15:05:22 +02:00
{ " CL_TYPE_MACHO " , CL_TYPE_MACHO } ,
2009-07-14 18:19:54 +02:00
{ " CL_TYPE_MACHO_UNIBIN " , CL_TYPE_MACHO_UNIBIN } ,
2007-12-14 22:39:37 +00:00
{ " CL_TYPE_POSIX_TAR " , CL_TYPE_POSIX_TAR } ,
{ " CL_TYPE_OLD_TAR " , CL_TYPE_OLD_TAR } ,
2009-07-06 16:15:33 +02:00
{ " CL_TYPE_CPIO_OLD " , CL_TYPE_CPIO_OLD } ,
{ " CL_TYPE_CPIO_ODC " , CL_TYPE_CPIO_ODC } ,
{ " CL_TYPE_CPIO_NEWC " , CL_TYPE_CPIO_NEWC } ,
{ " CL_TYPE_CPIO_CRC " , CL_TYPE_CPIO_CRC } ,
2007-12-14 22:39:37 +00:00
{ " CL_TYPE_GZ " , CL_TYPE_GZ } ,
{ " CL_TYPE_ZIP " , CL_TYPE_ZIP } ,
{ " CL_TYPE_BZ " , CL_TYPE_BZ } ,
{ " CL_TYPE_RAR " , CL_TYPE_RAR } ,
{ " CL_TYPE_ARJ " , CL_TYPE_ARJ } ,
{ " CL_TYPE_MSSZDD " , CL_TYPE_MSSZDD } ,
{ " CL_TYPE_MSOLE2 " , CL_TYPE_MSOLE2 } ,
{ " CL_TYPE_MSCAB " , CL_TYPE_MSCAB } ,
{ " CL_TYPE_MSCHM " , CL_TYPE_MSCHM } ,
{ " CL_TYPE_SIS " , CL_TYPE_SIS } ,
{ " CL_TYPE_SCRENC " , CL_TYPE_SCRENC } ,
{ " CL_TYPE_GRAPHICS " , CL_TYPE_GRAPHICS } ,
{ " CL_TYPE_RIFF " , CL_TYPE_RIFF } ,
{ " CL_TYPE_BINHEX " , CL_TYPE_BINHEX } ,
{ " CL_TYPE_TNEF " , CL_TYPE_TNEF } ,
{ " CL_TYPE_CRYPTFF " , CL_TYPE_CRYPTFF } ,
{ " CL_TYPE_PDF " , CL_TYPE_PDF } ,
{ " CL_TYPE_UUENCODED " , CL_TYPE_UUENCODED } ,
{ " CL_TYPE_HTML_UTF16 " , CL_TYPE_HTML_UTF16 } ,
2008-02-04 21:38:34 +00:00
{ " CL_TYPE_SCRIPT " , CL_TYPE_SCRIPT } ,
2007-12-14 22:39:37 +00:00
{ " CL_TYPE_RTF " , CL_TYPE_RTF } ,
{ " CL_TYPE_HTML " , CL_TYPE_HTML } ,
{ " CL_TYPE_MAIL " , CL_TYPE_MAIL } ,
{ " CL_TYPE_SFX " , CL_TYPE_SFX } ,
{ " CL_TYPE_ZIPSFX " , CL_TYPE_ZIPSFX } ,
{ " CL_TYPE_RARSFX " , CL_TYPE_RARSFX } ,
{ " CL_TYPE_CABSFX " , CL_TYPE_CABSFX } ,
{ " CL_TYPE_ARJSFX " , CL_TYPE_ARJSFX } ,
{ " CL_TYPE_NULSFT " , CL_TYPE_NULSFT } ,
{ " CL_TYPE_AUTOIT " , CL_TYPE_AUTOIT } ,
2009-07-13 01:02:13 +02:00
{ " CL_TYPE_ISHIELD_MSI " , CL_TYPE_ISHIELD_MSI } ,
2009-08-06 18:22:46 +02:00
{ " CL_TYPE_7Z " , CL_TYPE_7Z } ,
2011-11-01 12:27:46 +01:00
{ " CL_TYPE_7ZSFX " , CL_TYPE_7ZSFX } ,
2011-03-28 20:25:40 +02:00
{ " CL_TYPE_SWF " , CL_TYPE_SWF } ,
2011-11-14 21:23:15 +01:00
{ " CL_TYPE_ISO9660 " , CL_TYPE_ISO9660 } ,
2013-02-07 14:08:03 -05:00
{ " CL_TYPE_JAVA " , CL_TYPE_JAVA } ,
2013-08-27 13:44:11 -04:00
{ " CL_TYPE_DMG " , CL_TYPE_DMG } ,
2014-02-06 19:01:26 -05:00
{ " CL_TYPE_MBR " , CL_TYPE_MBR } ,
2014-01-24 14:24:56 -05:00
{ " CL_TYPE_GPT " , CL_TYPE_GPT } ,
2014-02-06 19:01:26 -05:00
{ " CL_TYPE_APM " , CL_TYPE_APM } ,
2013-08-27 13:44:11 -04:00
{ " CL_TYPE_XAR " , CL_TYPE_XAR } ,
2013-09-17 16:45:48 -04:00
{ " CL_TYPE_PART_ANY " , CL_TYPE_PART_ANY } ,
{ " CL_TYPE_PART_HFSPLUS " , CL_TYPE_PART_HFSPLUS } ,
2013-10-08 17:17:44 -04:00
{ " CL_TYPE_XZ " , CL_TYPE_XZ } ,
2013-10-17 16:54:21 -04:00
{ " CL_TYPE_OOXML_WORD " , CL_TYPE_OOXML_WORD } ,
{ " CL_TYPE_OOXML_PPT " , CL_TYPE_OOXML_PPT } ,
{ " CL_TYPE_OOXML_XL " , CL_TYPE_OOXML_XL } ,
2008-01-07 14:20:38 +00:00
{ NULL , CL_TYPE_IGNORED }
2004-07-02 23:00:58 +00:00
} ;
2007-12-14 22:39:37 +00:00
cli_file_t cli_ftcode ( const char * name )
{
unsigned int i ;
2004-07-02 23:00:58 +00:00
2007-12-14 22:39:37 +00:00
for ( i = 0 ; ftmap [ i ] . name ; i + + )
if ( ! strcmp ( ftmap [ i ] . name , name ) )
return ftmap [ i ] . code ;
2004-07-02 23:00:58 +00:00
2007-12-14 22:39:37 +00:00
return CL_TYPE_ERROR ;
}
2004-07-02 23:00:58 +00:00
2011-03-04 18:27:32 +01:00
const char * cli_ftname ( cli_file_t code )
{
unsigned int i ;
for ( i = 0 ; ftmap [ i ] . name ; i + + )
if ( ftmap [ i ] . code = = code )
return ftmap [ i ] . name ;
return NULL ;
}
2008-10-19 16:16:49 +00:00
void cli_ftfree ( const struct cl_engine * engine )
2007-12-14 22:39:37 +00:00
{
2008-10-19 16:16:49 +00:00
struct cli_ftype * ftypes = engine - > ftypes , * pt ;
2007-12-14 22:39:37 +00:00
while ( ftypes ) {
pt = ftypes ;
ftypes = ftypes - > next ;
2009-01-26 19:47:02 +00:00
mpool_free ( engine - > mempool , pt - > magic ) ;
mpool_free ( engine - > mempool , pt - > tname ) ;
mpool_free ( engine - > mempool , pt ) ;
2007-12-14 22:39:37 +00:00
}
2013-09-17 16:45:48 -04:00
ftypes = engine - > ptypes ;
while ( ftypes ) {
pt = ftypes ;
ftypes = ftypes - > next ;
mpool_free ( engine - > mempool , pt - > magic ) ;
mpool_free ( engine - > mempool , pt - > tname ) ;
mpool_free ( engine - > mempool , pt ) ;
}
}
cli_file_t cli_partitiontype ( const unsigned char * buf , size_t buflen , const struct cl_engine * engine )
{
struct cli_ftype * ptype = engine - > ptypes ;
while ( ptype ) {
if ( ptype - > offset + ptype - > length < = buflen ) {
if ( ! memcmp ( buf + ptype - > offset , ptype - > magic , ptype - > length ) ) {
cli_dbgmsg ( " Recognized %s partition \n " , ptype - > tname ) ;
return ptype - > type ;
}
}
ptype = ptype - > next ;
}
return CL_TYPE_PART_ANY ;
2007-12-14 22:39:37 +00:00
}
2005-03-29 00:55:06 +00:00
2007-12-14 22:39:37 +00:00
cli_file_t cli_filetype ( const unsigned char * buf , size_t buflen , const struct cl_engine * engine )
2004-07-02 23:00:58 +00:00
{
2007-12-14 22:39:37 +00:00
struct cli_ftype * ftype = engine - > ftypes ;
2004-08-24 00:50:29 +00:00
2004-07-02 23:00:58 +00:00
2007-12-14 22:39:37 +00:00
while ( ftype ) {
if ( ftype - > offset + ftype - > length < = buflen ) {
if ( ! memcmp ( buf + ftype - > offset , ftype - > magic , ftype - > length ) ) {
cli_dbgmsg ( " Recognized %s file \n " , ftype - > tname ) ;
return ftype - > type ;
2004-07-02 23:00:58 +00:00
}
}
2007-12-14 22:39:37 +00:00
ftype = ftype - > next ;
2004-07-02 23:00:58 +00:00
}
2008-01-07 14:20:38 +00:00
return cli_texttype ( buf , buflen ) ;
2004-07-02 23:00:58 +00:00
}
2012-01-05 14:16:09 +02:00
int is_tar ( const unsigned char * buf , unsigned int nbytes ) ;
2005-03-22 21:26:27 +00:00
2013-09-17 16:45:48 -04:00
cli_file_t cli_filetype2 ( fmap_t * map , const struct cl_engine * engine , cli_file_t basetype )
2005-03-22 21:26:27 +00:00
{
2012-09-14 11:42:16 -04:00
unsigned char buffer [ MAGIC_BUFFER_SIZE ] ;
2012-01-05 14:16:09 +02:00
const unsigned char * buff ;
unsigned char * decoded ;
2013-09-17 16:45:48 -04:00
int bread , sret ;
2008-01-07 14:20:38 +00:00
cli_file_t ret = CL_TYPE_BINARY_DATA ;
2006-10-25 15:40:47 +00:00
struct cli_matcher * root ;
2006-11-15 15:26:54 +00:00
struct cli_ac_data mdata ;
2005-03-22 21:26:27 +00:00
2008-01-07 14:20:38 +00:00
if ( ! engine ) {
cli_errmsg ( " cli_filetype2: engine == NULL \n " ) ;
return CL_TYPE_ERROR ;
}
2013-09-17 16:45:48 -04:00
if ( basetype = = CL_TYPE_PART_ANY ) {
bread = MIN ( map - > len , CL_PART_MBUFF_SIZE ) ;
}
else {
bread = MIN ( map - > len , CL_FILE_MBUFF_SIZE ) ;
}
if ( bread > MAGIC_BUFFER_SIZE ) {
/* Save anyone who tampered with the header */
bread = MAGIC_BUFFER_SIZE ;
}
2009-09-01 13:49:36 +02:00
buff = fmap_need_off_once ( map , 0 , bread ) ;
2012-09-14 11:42:16 -04:00
if ( buff ) {
sret = cli_memcpy ( buffer , buff , bread ) ;
if ( sret ) {
cli_errmsg ( " cli_filetype2: fileread error! \n " ) ;
return CL_TYPE_ERROR ;
}
sret = 0 ;
} else {
return CL_TYPE_ERROR ;
}
2013-09-17 16:45:48 -04:00
if ( basetype = = CL_TYPE_PART_ANY ) { /* typing a partition */
ret = cli_partitiontype ( buff , bread , engine ) ;
}
else { /* typing a file */
ret = cli_filetype ( buff , bread , engine ) ;
if ( ret = = CL_TYPE_BINARY_DATA ) {
switch ( is_tar ( buff , bread ) ) {
case 1 :
cli_dbgmsg ( " Recognized old fashioned tar file \n " ) ;
return CL_TYPE_OLD_TAR ;
case 2 :
cli_dbgmsg ( " Recognized POSIX tar file \n " ) ;
return CL_TYPE_POSIX_TAR ;
}
2014-01-24 18:29:56 -05:00
} else if ( ret = = CL_TYPE_ZIP & & bread > 2 * ( SIZEOF_LH + 5 ) ) {
2013-10-17 16:54:21 -04:00
const char lhdr_magic [ 4 ] = { 0x50 , 0x4b , 0x03 , 0x04 } ;
2014-01-24 18:29:56 -05:00
const unsigned char * zbuff = buff ;
uint32_t zread = bread ;
uint64_t zoff = bread ;
const unsigned char * znamep = buff ;
int32_t zlen = bread ;
int lhc = 0 ;
int zi ;
for ( zi = 0 ; zi < 32 ; zi + + ) {
znamep = cli_memstr ( znamep , zlen , lhdr_magic , 4 ) ;
if ( NULL ! = znamep ) {
znamep + = SIZEOF_LH ;
zlen = zread - ( znamep - zbuff ) ;
if ( zlen > 4 ) { /* Ensure we've mapped for OOXML filename compare */
if ( 0 = = memcmp ( znamep , " xl/ " , 3 ) ) {
cli_dbgmsg ( " Recognized OOXML XL file \n " ) ;
return CL_TYPE_OOXML_XL ;
} else if ( 0 = = memcmp ( znamep , " ppt/ " , 4 ) ) {
cli_dbgmsg ( " Recognized OOXML PPT file \n " ) ;
return CL_TYPE_OOXML_PPT ;
} else if ( 0 = = memcmp ( znamep , " word/ " , 5 ) ) {
cli_dbgmsg ( " Recognized OOXML Word file \n " ) ;
return CL_TYPE_OOXML_WORD ;
}
if ( + + lhc > 2 )
break ; /* only check first three zip headers */
}
else {
znamep = NULL ; /* force to map more */
}
}
if ( znamep = = NULL ) {
if ( map - > len - zoff > SIZEOF_LH ) {
zoff - = SIZEOF_LH + 5 ; /* remap for SIZEOF_LH+filelen for header overlap map boundary */
zread = MIN ( MAGIC_BUFFER_SIZE , map - > len - zoff ) ;
zbuff = fmap_need_off_once ( map , zoff , zread ) ;
if ( zbuff = = NULL ) {
cli_dbgmsg ( " cli_filetype2: error mapping data for OOXML check \n " ) ;
return CL_TYPE_ERROR ;
}
zoff + = zread ;
znamep = zbuff ;
zlen = zread ;
}
else {
break ; /* end of data */
2013-10-17 16:54:21 -04:00
}
}
2014-01-24 18:29:56 -05:00
}
2014-02-06 19:01:26 -05:00
} else if ( ret = = CL_TYPE_MBR ) {
const unsigned char * rbuff = buff + 512 ;
int ri ;
/* raw dmgs must be a multiple of 512 */
if ( ( map - > len % 512 ) = = 0 & & map - > len > 512 ) {
/* check if detected MBR is protective on GPT */
if ( 0 = = memcmp ( rbuff , " EFI PART " , 8 ) ) {
cli_dbgmsg ( " Recognized GUID Partition Table file \n " ) ;
return CL_TYPE_GPT ;
}
/* check if the MBR is a valid configuration */
if ( cli_mbr_check ( buff , bread , map - > len ) = = 0 ) {
return CL_TYPE_MBR ;
}
}
/* re-detect type */
cli_dbgmsg ( " Recognized binary data \n " ) ;
ret = CL_TYPE_BINARY_DATA ;
2013-10-17 16:54:21 -04:00
}
2010-12-30 15:04:02 +01:00
}
2008-01-07 14:20:38 +00:00
if ( ret > = CL_TYPE_TEXT_ASCII & & ret < = CL_TYPE_BINARY_DATA ) {
/* HTML files may contain special characters and could be
* misidentified as BINARY_DATA by cli_filetype ( )
*/
2006-10-25 15:40:47 +00:00
root = engine - > root [ 0 ] ;
if ( ! root )
return ret ;
2009-08-21 15:55:10 +02:00
if ( cli_ac_initdata ( & mdata , root - > ac_partsigs , root - > ac_lsigs , root - > ac_reloff_num , CLI_DEFAULT_AC_TRACKLEN ) )
2006-10-25 15:40:47 +00:00
return ret ;
2009-08-14 14:38:13 +02:00
sret = cli_ac_scanbuff ( buff , bread , NULL , NULL , NULL , engine - > root [ 0 ] , & mdata , 0 , ret , NULL , AC_SCAN_FT , NULL ) ;
2006-11-15 15:26:54 +00:00
cli_ac_freedata ( & mdata ) ;
2006-10-25 15:40:47 +00:00
if ( sret > = CL_TYPENO ) {
ret = sret ;
} else {
2009-08-21 15:55:10 +02:00
if ( cli_ac_initdata ( & mdata , root - > ac_partsigs , root - > ac_lsigs , root - > ac_reloff_num , CLI_DEFAULT_AC_TRACKLEN ) )
2006-11-15 15:26:54 +00:00
return ret ;
2008-05-27 16:30:47 +00:00
decoded = ( unsigned char * ) cli_utf16toascii ( ( char * ) buff , bread ) ;
2006-10-25 15:40:47 +00:00
if ( decoded ) {
2010-03-05 22:01:48 +01:00
sret = cli_ac_scanbuff ( decoded , bread / 2 , NULL , NULL , NULL , engine - > root [ 0 ] , & mdata , 0 , CL_TYPE_TEXT_ASCII , NULL , AC_SCAN_FT , NULL ) ;
2006-10-25 15:40:47 +00:00
free ( decoded ) ;
if ( sret = = CL_TYPE_HTML )
ret = CL_TYPE_HTML_UTF16 ;
}
2006-11-15 15:26:54 +00:00
cli_ac_freedata ( & mdata ) ;
2006-12-26 16:17:02 +00:00
2007-05-01 16:08:57 +00:00
if ( ( ( ( struct cli_dconf * ) engine - > dconf ) - > phishing & PHISHING_CONF_ENTCONV ) & & ret ! = CL_TYPE_HTML_UTF16 ) {
2008-02-01 19:38:52 +00:00
const char * encoding ;
/* check if we can autodetect this encoding.
* If we can ' t don ' t try to detect HTML sig , since
* we just tried that above , and failed */
2008-05-27 16:30:47 +00:00
if ( ( encoding = encoding_detect_bom ( buff , bread ) ) ) {
2009-10-08 22:50:16 +02:00
unsigned char decodedbuff [ ( MAGIC_BUFFER_SIZE + 1 ) * 2 ] ;
2008-02-01 19:38:52 +00:00
m_area_t in_area , out_area ;
2012-06-13 08:34:12 -04:00
memset ( decodedbuff , 0 , sizeof ( decodedbuff ) ) ;
2008-02-01 19:38:52 +00:00
2008-05-27 16:30:47 +00:00
in_area . buffer = ( unsigned char * ) buff ;
2008-02-01 19:38:52 +00:00
in_area . length = bread ;
in_area . offset = 0 ;
out_area . buffer = decodedbuff ;
out_area . length = sizeof ( decodedbuff ) ;
out_area . offset = 0 ;
/* in htmlnorm we simply skip over \0 chars, and that allows to parse HTML in any unicode
* ( multibyte characters will not be exactly handled , but that is not a problem ) .
* However when detecting whether a file is HTML or not , we need exact conversion .
* ( just eliminating zeros and matching would introduce false positives */
if ( encoding_normalize_toascii ( & in_area , encoding , & out_area ) > = 0 & & out_area . length > 0 ) {
2009-08-21 15:55:10 +02:00
if ( cli_ac_initdata ( & mdata , root - > ac_partsigs , root - > ac_lsigs , root - > ac_reloff_num , CLI_DEFAULT_AC_TRACKLEN ) )
2008-02-01 19:38:52 +00:00
return ret ;
if ( out_area . length > 0 ) {
2009-08-14 14:38:13 +02:00
sret = cli_ac_scanbuff ( decodedbuff , out_area . length , NULL , NULL , NULL , engine - > root [ 0 ] , & mdata , 0 , 0 , NULL , AC_SCAN_FT , NULL ) ; /* FIXME: can we use CL_TYPE_TEXT_ASCII instead of 0? */
2008-02-01 19:38:52 +00:00
if ( sret = = CL_TYPE_HTML ) {
cli_dbgmsg ( " cli_filetype2: detected HTML signature in Unicode file \n " ) ;
/* htmlnorm is able to handle any unicode now, since it skips null chars */
ret = CL_TYPE_HTML ;
}
2008-01-20 22:18:14 +00:00
}
2006-12-26 16:17:02 +00:00
2008-02-01 19:38:52 +00:00
cli_ac_freedata ( & mdata ) ;
}
2008-01-20 22:18:14 +00:00
}
2006-12-26 16:17:02 +00:00
}
2006-10-25 15:40:47 +00:00
}
}
2005-03-22 21:26:27 +00:00
return ret ;
}