2005-04-27 12:45:17 +00:00
/*
2019-01-25 10:15:50 -05:00
* Copyright ( C ) 2013 - 2019 Cisco Systems , Inc . and / or its affiliates . All rights reserved .
* Copyright ( C ) 2007 - 2013 Sourcefire , Inc .
2008-04-02 15:24:51 +00:00
*
2010-05-11 11:34:19 +03:00
* Authors : Nigel Horne , Török Edvin
*
* Also based on Matt Olney ' s pdf parser in snort - nrt .
2005-04-27 12:45:17 +00:00
*
* This program is free software ; you can redistribute it and / or modify
2008-04-02 15:24:51 +00:00
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
2005-04-27 12:45:17 +00:00
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
2008-04-02 15:24:51 +00:00
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
2007-03-01 11:06:37 +00:00
*
* TODO : Embedded fonts
* TODO : Predictor image handling
2005-04-27 12:45:17 +00:00
*/
# if HAVE_CONFIG_H
# include "clamav-config.h"
# endif
2005-04-30 13:12:28 +00:00
# include <stdio.h>
# include <sys/types.h>
# include <sys/stat.h>
# include <ctype.h>
# include <string.h>
# include <fcntl.h>
# include <stdlib.h>
2005-05-01 11:47:49 +00:00
# include <errno.h>
2018-12-03 12:40:13 -05:00
# ifdef HAVE_LIMITS_H
2007-02-25 00:43:49 +00:00
# include <limits.h>
# endif
2018-12-03 12:40:13 -05:00
# ifdef HAVE_UNISTD_H
2007-06-18 18:57:57 +00:00
# include <unistd.h>
# endif
2005-04-30 13:12:28 +00:00
# include <zlib.h>
2014-05-27 16:33:51 -04:00
# if HAVE_ICONV
2014-05-23 14:06:35 -04:00
# include <iconv.h>
2014-05-27 16:33:51 -04:00
# endif
2014-05-23 14:06:35 -04:00
2017-03-31 12:11:31 -04:00
# ifdef _WIN32
# include <stdint.h>
# endif
2007-02-25 00:43:49 +00:00
# include "clamav.h"
# include "others.h"
2005-12-09 07:39:17 +00:00
# include "pdf.h"
2016-03-31 16:32:26 -04:00
# include "pdfdecode.h"
2008-02-12 11:33:47 +00:00
# include "scanners.h"
2009-08-25 01:21:15 +02:00
# include "fmap.h"
2009-08-24 22:09:38 +02:00
# include "str.h"
2010-08-02 15:42:58 +03:00
# include "bytecode.h"
# include "bytecode_api.h"
2011-05-07 18:06:06 +03:00
# include "arc4.h"
2011-12-14 15:43:14 +02:00
# include "rijndael.h"
2011-12-23 17:40:22 +02:00
# include "textnorm.h"
2015-04-14 15:53:17 -04:00
# include "conv.h"
2014-04-29 17:27:02 -04:00
# include "json_api.h"
2011-12-28 19:05:57 +02:00
2018-12-03 12:40:13 -05:00
# ifdef CL_DEBUG
2019-03-05 21:15:41 -05:00
/*#define SAVE_TMP
2008-08-21 20:21:43 +00:00
* Save the file being worked on in tmp */
2007-03-01 11:06:37 +00:00
# endif
2014-06-03 09:46:13 -04:00
struct pdf_struct ;
2018-12-03 12:40:13 -05:00
static int asciihexdecode ( const char * buf , off_t len , char * output ) ;
static int ascii85decode ( const char * buf , off_t len , unsigned char * output ) ;
static const char * pdf_nextlinestart ( const char * ptr , size_t len ) ;
static const char * pdf_nextobject ( const char * ptr , size_t len ) ;
2005-05-21 22:07:14 +00:00
2014-04-29 17:27:02 -04:00
/* PDF statistics callbacks and related */
2014-06-25 14:06:17 -04:00
struct pdfname_action ;
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-04-29 17:27:02 -04:00
static void pdf_export_json ( struct pdf_struct * ) ;
2014-06-25 14:06:17 -04:00
static void ASCIIHexDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void ASCII85Decode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void EmbeddedFile_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void FlateDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Image_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void LZWDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void RunLengthDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void CCITTFaxDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void JBIG2Decode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void DCTDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void JPXDecode_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Crypt_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Standard_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Sig_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void JavaScript_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void OpenAction_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Launch_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Page_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Author_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Creator_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Producer_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void CreationDate_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void ModificationDate_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Title_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Subject_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Keywords_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Pages_cb ( struct pdf_struct * , struct pdf_obj * , struct pdfname_action * ) ;
static void Colors_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act ) ;
2014-06-27 12:43:23 -04:00
static void RichMedia_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act ) ;
static void AcroForm_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act ) ;
static void XFA_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act ) ;
2014-07-08 19:53:41 -04:00
# endif
2014-04-29 17:27:02 -04:00
/* End PDF statistics callbacks and related */
2014-04-16 14:23:16 -04:00
2018-08-14 14:00:31 -07:00
static int pdf_readint ( const char * q0 , int len , const char * key ) ;
2018-12-03 12:40:13 -05:00
static const char * pdf_getdict ( const char * q0 , int * len , const char * key ) ;
2018-08-14 14:00:31 -07:00
static char * pdf_readval ( const char * q , int len , const char * key ) ;
static char * pdf_readstring ( const char * q0 , int len , const char * key , unsigned * slen , const char * * qend , int noescape ) ;
2010-05-10 11:57:44 +03:00
static int xrefCheck ( const char * xref , const char * eof )
{
const char * q ;
2014-04-07 16:39:54 -04:00
2010-05-11 13:33:07 +03:00
while ( xref < eof & & ( * xref = = ' ' | | * xref = = ' \n ' | | * xref = = ' \r ' ) )
2014-04-07 16:39:54 -04:00
xref + + ;
2010-05-10 11:57:44 +03:00
if ( xref + 4 > = eof )
2014-04-07 16:39:54 -04:00
return - 1 ;
2019-03-05 21:15:41 -05:00
if ( ! memcmp ( xref , " xref " , strlen ( " xref " ) ) ) {
2014-04-07 16:39:54 -04:00
cli_dbgmsg ( " cli_pdf: found xref \n " ) ;
return 0 ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
/* could be xref stream */
2018-12-03 12:40:13 -05:00
for ( q = xref ; q + 5 < eof ; q + + ) {
2019-03-05 21:15:41 -05:00
if ( ! memcmp ( q , " /XRef " , strlen ( " /XRef " ) ) ) {
2014-04-07 16:39:54 -04:00
cli_dbgmsg ( " cli_pdf: found /XRef \n " ) ;
return 0 ;
}
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
return - 1 ;
}
2011-12-28 19:05:57 +02:00
/* define this to be noisy about things that we can't parse properly */
2017-08-24 16:33:33 -04:00
# undef NOISY
2011-12-28 19:05:57 +02:00
# ifdef NOISY
# define noisy_msg(pdf, ...) cli_infomsg(pdf->ctx, __VA_ARGS__)
2017-08-16 17:31:45 -04:00
# define noisy_warnmsg(...) cli_warnmsg(__VA_ARGS__)
2011-12-28 19:05:57 +02:00
# else
2017-08-16 17:31:45 -04:00
# define noisy_msg(pdf, ...)
# define noisy_warnmsg(...)
2011-12-28 19:05:57 +02:00
# endif
2018-08-14 14:00:31 -07:00
/**
* @ brief Searching BACKwards , find the next character that is not a whitespace .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param q Index to start from ( at the end of the search space )
2019-03-05 21:15:41 -05:00
* @ param start Beginning of the search space .
*
2018-08-14 14:00:31 -07:00
* @ return const char * Address of the final non - whitespace character OR the same address as the start .
*/
2010-05-10 11:57:44 +03:00
static const char * findNextNonWSBack ( const char * q , const char * start )
{
2014-04-07 16:39:54 -04:00
while ( q > start & & ( * q = = 0 | | * q = = 9 | | * q = = 0xa | | * q = = 0xc | | * q = = 0xd | | * q = = 0x20 ) )
q - - ;
2010-05-10 11:57:44 +03:00
return q ;
}
2018-08-14 14:00:31 -07:00
/**
* @ brief Searching FORwards , find the next character that is not a whitespace .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param q Index to start from ( at the end of the search space )
2019-03-05 21:15:41 -05:00
* @ param start Beginning of the search space .
*
2018-08-14 14:00:31 -07:00
* @ return const char * Address of the final non - whitespace character OR the same address as the start .
*/
static const char * findNextNonWS ( const char * q , const char * end )
{
while ( q < end & & ( * q = = 0 | | * q = = 9 | | * q = = 0xa | | * q = = 0xc | | * q = = 0xd | | * q = = 0x20 ) )
q + + ;
return q ;
}
/**
* @ brief Find bounds of stream .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* PDF streams are prefixed with " stream " and suffixed with " endstream " .
* Return value indicates success or failure .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param start start address of search space .
2019-03-05 21:15:41 -05:00
* @ param size size of search space
2018-08-14 14:00:31 -07:00
* @ param [ out ] stream output param , address of start of stream data
2019-03-05 21:15:41 -05:00
* @ param [ out ] stream_size output param , size of stream data
2018-08-14 14:00:31 -07:00
* @ param newline_hack hack to support newlines that are \ r \ n , and not just \ n or just \ r .
2019-03-05 21:15:41 -05:00
*
* @ return cl_error_t CL_SUCCESS if stream bounds were found .
* @ return cl_error_t CL_BREAK if stream bounds could not be found .
* @ return cl_error_t CL_EFORMAT if stream start was found , but not end . ( truncated )
* @ return cl_error_t CL_EARG if invalid args were provided .
2018-08-14 14:00:31 -07:00
*/
2019-03-05 21:15:41 -05:00
static cl_error_t find_stream_bounds (
2018-12-03 12:40:13 -05:00
const char * start ,
2019-03-05 21:15:41 -05:00
size_t size ,
const char * * stream ,
size_t * stream_size ,
2018-08-14 14:00:31 -07:00
int newline_hack )
2010-05-11 10:37:10 +03:00
{
2019-03-05 21:15:41 -05:00
cl_error_t status = CL_BREAK ;
const char * idx ;
const char * stream_begin ;
const char * endstream_begin ;
size_t bytesleft = size ;
if ( ( NULL = = start ) | | ( 0 = = bytesleft ) | | ( NULL = = stream ) | | ( NULL = = stream_size ) ) {
status = CL_EARG ;
return status ;
}
* stream = NULL ;
* stream_size = 0 ;
2018-08-14 14:00:31 -07:00
/* Begin by finding the "stream" string that prefixes stream data. */
2019-03-05 21:15:41 -05:00
if ( ( stream_begin = cli_memstr ( start , bytesleft , " stream " , strlen ( " stream " ) ) ) ) {
idx = stream_begin + strlen ( " stream " ) ;
bytesleft - = idx - start ;
2014-04-07 16:39:54 -04:00
if ( bytesleft < 0 )
2019-03-05 21:15:41 -05:00
goto done ;
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
/* Skip any new line charcters. */
2019-03-05 21:15:41 -05:00
if ( bytesleft > = 2 & & idx [ 0 ] = = ' \xd ' & & idx [ 1 ] = = ' \xa ' ) {
idx + = 2 ;
if ( newline_hack & & ( bytesleft > 2 ) & & idx [ 0 ] = = ' \xa ' )
idx + + ;
} else if ( bytesleft & & idx [ 0 ] = = ' \xa ' ) {
idx + + ;
2014-04-07 16:39:54 -04:00
}
2019-03-05 21:15:41 -05:00
/* Pass back start of the stream data. */
* stream = idx ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
bytesleft = size - ( idx - start ) ;
if ( bytesleft < = 0 )
goto done ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* Now find the "endstream" string that suffixes stream data. */
endstream_begin = cli_memstr ( idx , bytesleft , " endstream " , strlen ( " endstream " ) ) ;
if ( ! endstream_begin ) {
/* Couldn't find "endstream", but that's ok --
* - - we ' ll just count the rest of the provided buffer . */
cli_dbgmsg ( " find_stream_bounds: Truncated stream found! \n " ) ;
endstream_begin = start + size ;
status = CL_EFORMAT ;
2018-08-14 14:00:31 -07:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* Pass back end of the stream data, as offset from start. */
* stream_size = endstream_begin - * stream ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
if ( CL_EFORMAT ! = status )
status = CL_SUCCESS ;
2010-05-11 10:37:10 +03:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
done :
return status ;
2010-05-11 10:37:10 +03:00
}
2018-06-09 09:42:57 -04:00
/**
2019-03-05 21:15:41 -05:00
* @ brief Find the next * indirect * object in an object stream , adds it to our list of
2018-08-14 14:00:31 -07:00
* objects , and increments nobj .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* Indirect objects in a stream DON ' T begin with " obj " and end with " endobj " .
* Instead , they have an obj ID and an offset from the first object to point you
* right at them .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* If found , objstm - > current will be updated to the next obj id .
2019-03-05 21:15:41 -05:00
*
* All objects in an object stream are indirect and thus do not begin or start
* with " obj " or " endobj " . Instead , the object stream takes the following
2018-08-14 14:00:31 -07:00
* format .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* < dictionary describing stream > objstm content endobjstm
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* where content looks something like the following :
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* 15 0 16 3 17 46 ( ab ) < < / IDS 8 0 R / JavaScript 27 0 R / URLS 9 0 R > > < < / Names [ ( Test ) 28 0 R ] > >
2019-03-05 21:15:41 -05:00
*
* In the above example , the literal string ( ab ) is indirect object # 15 , and
* begins at offset 0 of the set of objects . The next object , # 16 begis at
* offset 3 is a dictionary . The final object is also a dictionary , beginning
2018-08-14 14:00:31 -07:00
* at offset 46.
2019-03-05 21:15:41 -05:00
*
* @ param pdf Pdf struct that keeps track of all information found in the PDF .
2018-08-14 14:00:31 -07:00
* @ param objstm
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ return CL_SUCCESS if success
* @ return CL_EPARSE if parsing error
* @ return CL_EMEM if error allocating memory
* @ return CL_EARG if invalid arguments
*/
int pdf_findobj_in_objstm ( struct pdf_struct * pdf , struct objstm_struct * objstm , struct pdf_obj * * obj_found )
{
2018-12-03 12:40:13 -05:00
cl_error_t status = CL_EPARSE ;
2018-08-14 14:00:31 -07:00
struct pdf_obj * obj = NULL ;
2019-03-05 21:15:41 -05:00
unsigned long objid = 0 , objoff = 0 ;
2019-01-22 14:15:46 -05:00
long temp_long = 0 ;
2018-12-03 12:40:13 -05:00
const char * index = NULL ;
2018-08-14 14:00:31 -07:00
size_t bytes_remaining = 0 ;
if ( NULL = = pdf | | NULL = = objstm ) {
cli_warnmsg ( " pdf_findobj_in_objstm: invalid arguments \n " ) ;
return CL_EARG ;
}
* obj_found = NULL ;
2018-12-03 12:40:13 -05:00
index = objstm - > streambuf + objstm - > current_pair ;
2018-08-14 14:00:31 -07:00
bytes_remaining = objstm - > streambuf_len - objstm - > current_pair ;
obj = calloc ( sizeof ( struct pdf_obj ) , 1 ) ;
if ( ! obj ) {
cli_warnmsg ( " pdf_findobj_in_objstm: out of memory finding objects in stream \n " ) ;
status = CL_EMEM ;
goto done ;
}
/* This object is in a stream, not in the regular map buffer. */
obj - > objstm = objstm ;
/* objstm->current_pair points directly to the obj id */
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
/* Failed to find objid */
cli_dbgmsg ( " pdf_findobj_in_objstm: Failed to find objid for obj in object stream \n " ) ;
status = CL_EPARSE ;
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj_in_objstm: Encountered invalid negative objid (%ld). \n " , temp_long ) ;
status = CL_EPARSE ;
goto done ;
2018-08-14 14:00:31 -07:00
}
2019-01-22 14:15:46 -05:00
objid = ( unsigned long ) temp_long ;
2018-08-14 14:00:31 -07:00
/* Find the obj offset that appears just after the obj id*/
while ( ( index < objstm - > streambuf + objstm - > streambuf_len ) & & isdigit ( * index ) ) {
index + + ;
bytes_remaining - - ;
}
2018-12-03 12:40:13 -05:00
index = findNextNonWS ( index , objstm - > streambuf + objstm - > first ) ;
2018-08-14 14:00:31 -07:00
bytes_remaining = objstm - > streambuf + objstm - > streambuf_len - index ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
/* Failed to find obj offset */
cli_dbgmsg ( " pdf_findobj_in_objstm: Failed to find obj offset for obj in object stream \n " ) ;
status = CL_EPARSE ;
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj_in_objstm: Encountered invalid negative obj offset (%ld). \n " , temp_long ) ;
status = CL_EPARSE ;
goto done ;
2018-08-14 14:00:31 -07:00
}
2019-01-22 14:15:46 -05:00
objoff = ( unsigned long ) temp_long ;
2018-08-14 14:00:31 -07:00
2019-01-22 13:53:29 -05:00
if ( ( size_t ) objstm - > first + ( size_t ) objoff > objstm - > streambuf_len ) {
/* Alleged obj location is further than the length of the stream */
cli_dbgmsg ( " pdf_findobj_in_objstm: obj offset found is greater than the length of the stream. \n " ) ;
status = CL_EPARSE ;
goto done ;
}
2018-08-14 14:00:31 -07:00
objstm - > current = objstm - > first + objoff ;
2018-12-03 12:40:13 -05:00
obj - > id = ( objid < < 8 ) | ( 0 & 0xff ) ;
2018-08-14 14:00:31 -07:00
obj - > start = objstm - > current ;
obj - > flags = 0 ;
objstm - > nobjs_found + + ;
while ( ( index < objstm - > streambuf + objstm - > streambuf_len ) & & isdigit ( * index ) ) {
index + + ;
bytes_remaining - - ;
}
objstm - > current_pair = ( uint32_t ) ( findNextNonWS ( index , objstm - > streambuf + objstm - > first ) - objstm - > streambuf ) ;
/* Update current_pair, if there are more */
if ( ( objstm - > nobjs_found < objstm - > n ) & &
2018-12-03 12:40:13 -05:00
( index < objstm - > streambuf + objstm - > streambuf_len ) ) {
2018-08-14 14:00:31 -07:00
unsigned long next_objid = 0 , next_objoff = 0 ;
2019-03-05 21:15:41 -05:00
/*
* While we ' re at it ,
2018-08-14 14:00:31 -07:00
* lets record the size as running up to the next object offset .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* To do so , we will need to parse the next obj pair .
*/
/* objstm->current_pair points directly to the obj id */
2018-12-03 12:40:13 -05:00
index = objstm - > streambuf + objstm - > current_pair ;
2018-08-14 14:00:31 -07:00
bytes_remaining = objstm - > streambuf + objstm - > streambuf_len - index ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
/* Failed to find objid for next obj */
cli_dbgmsg ( " pdf_findobj_in_objstm: Failed to find next objid for obj in object stream though there should be {%u} more. \n " , objstm - > n - objstm - > nobjs_found ) ;
status = CL_EPARSE ;
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj_in_objstm: Encountered invalid negative objid (%ld). \n " , temp_long ) ;
status = CL_EPARSE ;
goto done ;
2018-08-14 14:00:31 -07:00
}
2019-01-22 14:15:46 -05:00
next_objid = ( unsigned long ) temp_long ;
2018-08-14 14:00:31 -07:00
/* Find the obj offset that appears just after the obj id*/
while ( ( index < objstm - > streambuf + objstm - > streambuf_len ) & & isdigit ( * index ) ) {
index + + ;
bytes_remaining - - ;
}
2018-12-03 12:40:13 -05:00
index = findNextNonWS ( index , objstm - > streambuf + objstm - > first ) ;
2018-08-14 14:00:31 -07:00
bytes_remaining = objstm - > streambuf + objstm - > streambuf_len - index ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
/* Failed to find obj offset for next obj */
cli_dbgmsg ( " pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more. \n " , objstm - > n - objstm - > nobjs_found ) ;
status = CL_EPARSE ;
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj_in_objstm: Encountered invalid negative obj offset (%ld). \n " , temp_long ) ;
status = CL_EPARSE ;
goto done ;
}
next_objoff = ( unsigned long ) temp_long ;
if ( next_objoff < = objoff ) {
2018-10-01 19:46:23 -04:00
/* Failed to find obj offset for next obj */
cli_dbgmsg ( " pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's less than or equal to the current one! \n " ) ;
status = CL_EPARSE ;
goto done ;
2018-12-03 12:40:13 -05:00
} else if ( objstm - > first + next_objoff > objstm - > streambuf_len ) {
2018-10-01 19:46:23 -04:00
/* Failed to find obj offset for next obj */
cli_dbgmsg ( " pdf_findobj_in_objstm: Found next obj offset for obj in object stream but it's further out than the size of the stream! \n " ) ;
status = CL_EPARSE ;
goto done ;
}
2018-08-14 14:00:31 -07:00
2018-10-01 19:46:23 -04:00
obj - > size = next_objoff - objoff ;
2018-12-03 12:40:13 -05:00
} else {
2018-08-14 14:00:31 -07:00
/*
* Should be no more objects . We should verify .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* Either way . . .
2019-03-05 21:15:41 -05:00
* obj - > size should be the rest of the buffer .
2018-08-14 14:00:31 -07:00
*/
if ( objstm - > nobjs_found < objstm - > n ) {
cli_warnmsg ( " pdf_findobj_in_objstm: Fewer objects found in object stream than expected! \n " ) ;
}
obj - > size = objstm - > streambuf_len - obj - > start ;
}
/* Success! Add the object to the list of all objects found. */
pdf - > nobjs + + ;
2018-12-03 12:40:13 -05:00
pdf - > objs = cli_realloc2 ( pdf - > objs , sizeof ( struct pdf_obj * ) * pdf - > nobjs ) ;
2018-08-14 14:00:31 -07:00
if ( ! pdf - > objs ) {
cli_warnmsg ( " pdf_findobj_in_objstm: out of memory finding objects in stream \n " ) ;
status = CL_EMEM ;
goto done ;
}
2018-12-03 12:40:13 -05:00
pdf - > objs [ pdf - > nobjs - 1 ] = obj ;
2018-08-14 14:00:31 -07:00
* obj_found = obj ;
status = CL_SUCCESS ;
done :
if ( CL_SUCCESS ! = status ) {
if ( NULL ! = obj ) {
free ( obj ) ;
}
}
return status ;
}
/**
* @ brief Find the next * indirect * object .
2019-03-05 21:15:41 -05:00
*
* Indirect objects located outside of an object stream are prefaced with :
* < objid > < genid > obj
*
* Each of the above are separated by whitespace of some sort .
*
* Indirect objects are postfaced with :
* endobj
*
* The specification does not say if whitespace is required before or after " endobj " .
*
* Identify truncated objects .
*
2018-08-14 14:00:31 -07:00
* If found , pdf - > offset will be updated to just after the " endobj " .
* If truncated , pdf - > offset will = = pdf - > size .
* If not found , pdf - > offset will not be updated .
2019-03-05 21:15:41 -05:00
*
* @ param pdf Pdf context struct that keeps track of all information found in the PDF .
*
2018-08-14 14:00:31 -07:00
* @ return CL_SUCCESS if success
* @ return CL_BREAK if no more objects
* @ return CL_EPARSE if parsing error
* @ return CL_EMEM if error allocating memory
2018-06-09 09:42:57 -04:00
*/
2018-08-14 14:00:31 -07:00
cl_error_t pdf_findobj ( struct pdf_struct * pdf )
2010-05-10 11:57:44 +03:00
{
2018-08-14 14:00:31 -07:00
cl_error_t status = CL_EPARSE ;
2019-03-05 21:15:41 -05:00
const char * start , * idx , * genid_search_index , * objid_search_index ;
const char * obj_begin = NULL , * obj_end = NULL ;
const char * endobj_begin = NULL , * endobj_end = NULL ;
2018-08-14 14:00:31 -07:00
struct pdf_obj * obj = NULL ;
2019-03-05 21:15:41 -05:00
size_t bytesleft ;
2018-06-02 20:58:35 -04:00
unsigned long genid , objid ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2010-05-10 11:57:44 +03:00
pdf - > nobjs + + ;
2018-12-03 12:40:13 -05:00
pdf - > objs = cli_realloc2 ( pdf - > objs , sizeof ( struct pdf_obj * ) * pdf - > nobjs ) ;
2010-05-10 11:57:44 +03:00
if ( ! pdf - > objs ) {
2018-08-14 14:00:31 -07:00
status = CL_EMEM ;
goto done ;
}
obj = malloc ( sizeof ( struct pdf_obj ) ) ;
if ( ! obj ) {
status = CL_EMEM ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2018-12-03 12:40:13 -05:00
pdf - > objs [ pdf - > nobjs - 1 ] = obj ;
2014-04-07 16:39:54 -04:00
2010-05-11 11:26:35 +03:00
memset ( obj , 0 , sizeof ( * obj ) ) ;
2018-08-14 14:00:31 -07:00
2018-12-03 12:40:13 -05:00
start = pdf - > map + pdf - > offset ;
2010-05-10 11:57:44 +03:00
bytesleft = pdf - > size - pdf - > offset ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
/*
* Start by searching for " obj "
*/
idx = start + 1 ;
while ( bytesleft > 1 + strlen ( " obj " ) ) {
/* `- 1` accounts for size of white space before obj */
idx = cli_memstr ( idx , bytesleft - 1 , " obj " , strlen ( " obj " ) ) ;
if ( NULL = = idx ) {
status = CL_BREAK ;
goto done ; /* No more objs. */
2018-08-14 14:00:31 -07:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* verify that the word has a whitespace before it, and is not the end of
* a previous word */
idx - - ;
bytesleft = ( pdf - > size - pdf - > offset ) - ( size_t ) ( idx - start ) ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
if ( * idx ! = 0 & & * idx ! = 9 & & * idx ! = 0xa & & * idx ! = 0xc & & * idx ! = 0xd & & * idx ! = 0x20 ) {
/* This instance of "obj" appears to be part of a longer string.
2018-08-14 14:00:31 -07:00
* Skip it , and keep searching for an object . */
2019-03-05 21:15:41 -05:00
idx + = 1 + strlen ( " obj " ) ;
bytesleft - = 1 + strlen ( " obj " ) ;
2014-04-07 16:39:54 -04:00
continue ;
}
2019-03-05 21:15:41 -05:00
/* Found the beginning of the word */
obj_begin = idx ;
obj_end = idx + 1 + strlen ( " obj " ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
break ;
2018-08-14 14:00:31 -07:00
}
2019-03-05 21:15:41 -05:00
if ( ( NULL = = obj_begin ) | | ( NULL = = obj_end ) ) {
status = CL_BREAK ;
goto done ; /* No more objs. */
}
2010-08-31 10:53:29 +03:00
2018-08-14 14:00:31 -07:00
/* Find the generation id (genid) that appears before the "obj" */
2019-03-05 21:15:41 -05:00
genid_search_index = findNextNonWSBack ( obj_begin - 1 , start ) ;
while ( genid_search_index > start & & isdigit ( * genid_search_index ) )
genid_search_index - - ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( genid_search_index , ( size_t ) ( ( obj_begin ) - genid_search_index ) , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_findobj: Failed to parse object genid (# objects found: %u) \n " , pdf - > nobjs ) ;
2018-06-09 09:42:57 -04:00
/* Failed to parse, probably not a real object. Skip past the "obj" thing, and continue. */
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2018-12-03 12:40:13 -05:00
status = CL_EPARSE ;
2018-08-14 14:00:31 -07:00
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj: Encountered invalid negative obj genid (%ld). \n " , temp_long ) ;
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2019-01-22 14:15:46 -05:00
status = CL_EPARSE ;
goto done ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
genid = ( unsigned long ) temp_long ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
/* Find the object id (objid) that appears before the genid */
objid_search_index = findNextNonWSBack ( genid_search_index - 1 , start ) ;
while ( objid_search_index > start & & isdigit ( * objid_search_index ) )
objid_search_index - - ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( objid_search_index , ( size_t ) ( ( genid_search_index ) - objid_search_index ) , 0 , 10 , & temp_long ) ) {
2018-06-02 20:58:35 -04:00
/*
2019-03-05 21:15:41 -05:00
* Edge case :
*
* PDFs with multiple revisions will have % % EOF before the end of the file ,
* followed by the next revision of the PDF , which will probably be an immediate objid .
*
* Example :
* % % EOF1 1 obj < blah > endobj
*
* If this is the case , we can detect it and continue parsing after the % % EOF .
2018-06-02 20:58:35 -04:00
*/
2019-03-05 21:15:41 -05:00
if ( objid_search_index - strlen ( " \ % \ %EO " ) > start ) {
const char * lastfile = objid_search_index - strlen ( " \ % \ %EO " ) ;
2018-06-02 20:58:35 -04:00
if ( 0 ! = strncmp ( lastfile , " \ % \ %EOF " , 5 ) ) {
2018-06-09 09:42:57 -04:00
/* Nope, wasn't %%EOF */
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_findobj: Failed to parse object objid (# objects found: %u) \n " , pdf - > nobjs ) ;
2018-06-09 09:42:57 -04:00
/* Skip past the "obj" thing, and continue. */
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2018-12-03 12:40:13 -05:00
status = CL_EPARSE ;
2018-08-14 14:00:31 -07:00
goto done ;
2018-06-02 20:58:35 -04:00
}
2019-03-05 21:15:41 -05:00
/* Yup, Looks, like the file continues after %%EOF.
2018-06-02 20:58:35 -04:00
* Probably another revision . Keep parsing . . . */
2019-03-05 21:15:41 -05:00
objid_search_index + + ;
cli_dbgmsg ( " pdf_findobj: \ % \ %EOF detected before end of file, at offset: %zu \n " , ( size_t ) ( objid_search_index - pdf - > map ) ) ;
2018-06-02 20:58:35 -04:00
} else {
/* Failed parsing at the very beginning */
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_findobj: Failed to parse object objid (# objects found: %u) \n " , pdf - > nobjs ) ;
2018-06-09 09:42:57 -04:00
/* Probably not a real object. Skip past the "obj" thing, and continue. */
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2018-12-03 12:40:13 -05:00
status = CL_EPARSE ;
2018-08-14 14:00:31 -07:00
goto done ;
2018-06-02 20:58:35 -04:00
}
/* Try again, with offset slightly adjusted */
2019-03-05 21:15:41 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( objid_search_index , ( size_t ) ( ( genid_search_index - 1 ) - objid_search_index ) , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_findobj: Failed to parse object objid (# objects found: %u) \n " , pdf - > nobjs ) ;
2018-06-09 09:42:57 -04:00
/* Still failed... Probably not a real object. Skip past the "obj" thing, and continue. */
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2018-12-03 12:40:13 -05:00
status = CL_EPARSE ;
2018-08-14 14:00:31 -07:00
goto done ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj: Encountered invalid negative objid (%ld). \n " , temp_long ) ;
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2019-01-22 14:15:46 -05:00
status = CL_EPARSE ;
goto done ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_findobj: There appears to be an additional revision. Continuing to parse... \n " ) ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_findobj: Encountered invalid negative objid (%ld). \n " , temp_long ) ;
2019-03-05 21:15:41 -05:00
pdf - > offset = obj_end - pdf - > map ;
2019-01-22 14:15:46 -05:00
status = CL_EPARSE ;
goto done ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
objid = ( unsigned long ) temp_long ;
2018-08-14 14:00:31 -07:00
2018-12-03 12:40:13 -05:00
obj - > id = ( objid < < 8 ) | ( genid & 0xff ) ;
2019-03-05 21:15:41 -05:00
obj - > start = obj_end - pdf - > map ; /* obj start begins just after the "obj" string */
2010-05-10 11:57:44 +03:00
obj - > flags = 0 ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
/*
* We now have the objid , genid , and object start .
* Find the object end ( " endobj " ) .
*/
/* `- 1` accounts for size of white space before obj */
endobj_begin = cli_memstr ( obj_end , pdf - > map + pdf - > size - obj_end , " endobj " , strlen ( " endobj " ) ) ;
if ( NULL = = endobj_begin ) {
/* No end to object.
* PDF appears to be malformed or truncated .
* Will record the object size as going ot the end of the file .
* Will record that the object is truncated .
* Will position the pdf offset to the end of the PDF .
* The next iteration of this function will find no more objects . */
obj - > flags | = 1 < < OBJ_TRUNCATED ;
obj - > size = ( pdf - > map + pdf - > size ) - obj_end ;
pdf - > offset = pdf - > size ;
/* Truncated "object" found! */
status = CL_SUCCESS ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2019-03-05 21:15:41 -05:00
endobj_end = endobj_begin + strlen ( " endobj " ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* Size of the object goes from "obj" <-> "endobject". */
obj - > size = endobj_begin - obj_end ;
pdf - > offset = endobj_end - pdf - > map ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/*
* Object found !
*/
2018-08-14 14:00:31 -07:00
status = CL_SUCCESS ; /* truncated file, no end to obj. */
done :
if ( status = = CL_SUCCESS ) {
2019-03-05 21:15:41 -05:00
cli_dbgmsg ( " pdf_findobj: found %d %d obj @%lld, size: %zu bytes. \n " , obj - > id > > 8 , obj - > id & 0xff , ( long long ) ( obj - > start + pdf - > startoff ) , obj - > size ) ;
2018-12-03 12:40:13 -05:00
} else {
2018-08-14 14:00:31 -07:00
/* Remove the unused obj reference from our list of objects found */
/* No need to realloc pdf->objs back down. It won't leak. */
2018-12-03 12:40:13 -05:00
pdf - > objs [ pdf - > nobjs - 1 ] = NULL ;
2018-08-14 14:00:31 -07:00
pdf - > nobjs - - ;
/* Free up the obj struct. */
if ( NULL ! = obj )
free ( obj ) ;
2019-03-05 21:15:41 -05:00
if ( status = = CL_BREAK ) {
cli_dbgmsg ( " pdf_findobj: No more objects (# objects found: %u) \n " , pdf - > nobjs ) ;
} else if ( status = = CL_EMEM ) {
cli_warnmsg ( " pdf_findobj: Error allocating memory (# objects found: %u) \n " , pdf - > nobjs ) ;
} else {
cli_dbgmsg ( " pdf_findobj: Unexpected status code %d. \n " , status ) ;
}
2018-08-14 14:00:31 -07:00
}
2018-12-03 12:40:13 -05:00
return status ;
2010-05-10 11:57:44 +03:00
}
2017-08-16 17:31:45 -04:00
static size_t filter_writen ( struct pdf_struct * pdf , struct pdf_obj * obj , int fout , const char * buf , size_t len , size_t * sum )
2010-05-11 10:37:10 +03:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
2017-08-16 17:31:45 -04:00
if ( cli_checklimits ( " pdf " , pdf - > ctx , ( unsigned long ) * sum , 0 , 0 ) ) /* TODO: May truncate for large values on 64-bit platforms */
2018-12-03 12:40:13 -05:00
return len ; /* pretend it was a successful write to suppress CL_EWRITE */
2014-04-07 16:39:54 -04:00
2010-05-11 10:37:10 +03:00
* sum + = len ;
2014-04-07 16:39:54 -04:00
2017-08-16 17:31:45 -04:00
return cli_writen ( fout , buf , ( unsigned int ) len ) ;
2010-05-11 10:37:10 +03:00
}
2016-03-31 12:29:16 -04:00
void pdfobj_flag ( struct pdf_struct * pdf , struct pdf_obj * obj , enum pdf_flag flag )
2010-07-30 14:23:10 +03:00
{
2018-12-03 12:40:13 -05:00
const char * s = " " ;
2010-07-30 14:23:10 +03:00
pdf - > flags | = 1 < < flag ;
if ( ! cli_debug_flag )
2014-04-07 16:39:54 -04:00
return ;
2010-07-30 14:23:10 +03:00
switch ( flag ) {
2018-12-03 12:40:13 -05:00
case UNTERMINATED_OBJ_DICT :
s = " dictionary not terminated " ;
break ;
case ESCAPED_COMMON_PDFNAME :
/* like /JavaScript */
s = " escaped common pdfname " ;
break ;
case BAD_STREAM_FILTERS :
s = " duplicate stream filters " ;
break ;
case BAD_PDF_VERSION :
s = " bad pdf version " ;
break ;
case BAD_PDF_HEADERPOS :
s = " bad pdf header position " ;
break ;
case BAD_PDF_TRAILER :
s = " bad pdf trailer " ;
break ;
case BAD_PDF_TOOMANYOBJS :
s = " too many pdf objs " ;
break ;
case BAD_FLATE :
s = " bad deflate stream " ;
break ;
case BAD_FLATESTART :
s = " bad deflate stream start " ;
break ;
case BAD_STREAMSTART :
s = " bad stream start " ;
break ;
case UNKNOWN_FILTER :
s = " unknown filter used " ;
break ;
case BAD_ASCIIDECODE :
s = " bad ASCII decode " ;
break ;
case HEX_JAVASCRIPT :
s = " hex javascript " ;
break ;
case BAD_INDOBJ :
s = " referencing nonexistent obj " ;
break ;
case HAS_OPENACTION :
s = " has /OpenAction " ;
break ;
case HAS_LAUNCHACTION :
s = " has /LaunchAction " ;
break ;
case BAD_STREAMLEN :
s = " bad /Length, too small " ;
break ;
case ENCRYPTED_PDF :
s = " PDF is encrypted " ;
break ;
case LINEARIZED_PDF :
s = " linearized PDF " ;
break ;
case MANY_FILTERS :
s = " more than 2 filters per obj " ;
break ;
case DECRYPTABLE_PDF :
s = " decryptable PDF " ;
break ;
}
cli_dbgmsg ( " pdfobj_flag: %s flagged in object %u %u \n " , s , obj - > id > > 8 , obj - > id & 0xff ) ;
2010-07-30 14:23:10 +03:00
}
2014-06-25 13:36:30 -04:00
struct pdf_obj * find_obj ( struct pdf_struct * pdf , struct pdf_obj * obj , uint32_t objid )
2010-05-11 10:37:10 +03:00
{
2014-06-10 13:21:31 -04:00
uint32_t j ;
uint32_t i ;
2010-05-11 13:33:07 +03:00
/* search starting at previous obj (if exists) */
2018-08-14 14:00:31 -07:00
for ( i = 0 ; i < pdf - > nobjs ; i + + ) {
if ( pdf - > objs [ i ] = = obj )
break ;
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
for ( j = i ; j < pdf - > nobjs ; j + + ) {
obj = pdf - > objs [ j ] ;
2014-04-07 16:39:54 -04:00
if ( obj - > id = = objid )
return obj ;
2010-05-11 10:37:10 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-11 10:37:10 +03:00
/* restart search from beginning if not found */
2018-08-14 14:00:31 -07:00
for ( j = 0 ; j < i ; j + + ) {
obj = pdf - > objs [ j ] ;
2014-04-07 16:39:54 -04:00
if ( obj - > id = = objid )
return obj ;
2010-05-11 10:37:10 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-11 10:37:10 +03:00
return NULL ;
}
2018-08-14 14:00:31 -07:00
/**
* @ brief Find and interpret the " /Length " dictionary key value .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* The value may be :
2019-03-05 21:15:41 -05:00
* - a direct object ( i . e . just a number )
2018-08-14 14:00:31 -07:00
* - an indirect object , where the value is somewhere else in the document and we have to look it up .
* indirect objects are referenced using an object id ( objid ) , generation id ( genid ) genid , and the letter ' R ' .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* Example dictionary with a single key " /Length " that relies direct object for the value .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* 1 0 obj
* < < / Length 534
* / Filter [ / ASCII85Decode / LZWDecode ]
* > >
* stream
* J . . ) 6 T ` ? p & < ! J9 % _ [ umg " B7/Z7KNXbN'S+,*Q/& " OLT ' FLIDK # ! n ` $ " <Atdi` \ Vn%b%)&'cA*VnK \ CJY(sF>c!Jnl@
* RM ] WM ; jjH6Gnc75idkL5 ] + cPZKEBPWdR > FF ( kj1_R % W_d & / jS ! ; iuad7h ? [ L - F $ + ] ] 0 A3Ck * $ I0KZ ? ; < ) CJtqi65Xb
* Vc3 \ n5ua : Q / = 0 $ W < # N3U ; H , MQKqfg1 ? : lUpR ; 6 oN [ C2E4ZNr8Udn . ' p + ? # X + 1 > 0 Kuk $ bCDF / ( 3f L5 ] Oq ) ^ kJZ ! C2H1
* ' TO ] Rl ? Q : & ' < 5 & iP ! $ Rq ; BXRecDN [ IJB ` , ) o8XJOSJ9sDS ] hQ ; Rj @ ! ND ) bD_q & C \ g : inYC % ) & u # : u , M6Bm % IY ! Kb1 +
* " :aAa'S`ViJglLb8<W9k6Yl \\ 0McJQkDeLWdPN?9A'jX*al>iG1p&i;eVoK&juJHs9%;Xomop " 5 KatWRT " JQ#qYuL,
* JD ? M $ 0 QP ) lKn06l1apKDC @ \ qJ4B ! ! ( 5 m + j .7F 790 m ( Vj88l8Q : _CZ ( Gm1 % X \ N1 & u ! FKHMB ~ >
* endstream
* endobj
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* Example dictionary with a single key " /Length " that relies on an indirect object for the value .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* 7 0 obj
* < < / Length 8 0 R > > % An indirect reference to object 8 , with generation id 0.
* stream
* BT
* / F1 12 Tf
* 72 712 Td
* ( A stream with an indirect length ) Tj
* ET
* endstream
* endobj
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* 8 0 obj
* 77 % The length of the preceding stream
* endobj
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param pdf Pdf context structure .
* @ param obj Pdf object context structure .
* @ param start Pointer start of the dictionary string .
* @ param len Remaining length of the dictioary string in bytes .
* @ return size_t Unsigned integer value of the " /Length " key
*/
static size_t find_length ( struct pdf_struct * pdf , struct pdf_obj * obj , const char * dict_start , size_t dict_len )
2010-05-11 10:37:10 +03:00
{
2019-01-22 14:15:46 -05:00
size_t length = 0 ;
const char * obj_start = dict_start ;
size_t bytes_remaining = dict_len ;
long temp_long = 0 ;
2018-08-14 14:00:31 -07:00
const char * index ;
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
if ( bytes_remaining < 8 ) {
2014-04-07 16:39:54 -04:00
return 0 ;
2018-08-14 14:00:31 -07:00
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
/*
* Find the " /Length " dictionary key
*/
index = cli_memstr ( obj_start , bytes_remaining , " /Length " , 7 ) ;
if ( ! index )
2014-04-07 16:39:54 -04:00
return 0 ;
2018-08-14 14:00:31 -07:00
if ( bytes_remaining < 1 ) {
2018-06-02 20:58:35 -04:00
return 0 ;
}
2018-08-14 14:00:31 -07:00
/* Step the index into the "/Length" string. */
index + + ;
bytes_remaining - = index - obj_start ;
/* Find the start of the next direct or indirect object.
* pdf_nextobject ( ) assumes we started searching from within a previous object */
obj_start = pdf_nextobject ( index , bytes_remaining ) ;
if ( ! obj_start )
return 0 ;
2019-03-05 21:15:41 -05:00
if ( bytes_remaining < ( size_t ) ( obj_start - index ) ) {
2018-08-14 14:00:31 -07:00
return 0 ;
}
bytes_remaining - = obj_start - index ;
index = obj_start ;
2018-12-03 12:40:13 -05:00
2018-08-14 14:00:31 -07:00
/* Read the value. This could either be the direct length value,
or the object id of the indirect object that has the length */
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
cli_dbgmsg ( " find_length: failed to parse object length or objid \n " ) ;
return 0 ;
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " find_length: Encountered invalid negative object length or objid (%ld). \n " , temp_long ) ;
2018-08-14 14:00:31 -07:00
return 0 ;
}
2019-01-22 14:15:46 -05:00
length = ( size_t ) temp_long ; /* length or maybe object id */
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
/*
* Keep parsing , skipping past the first integer that might have been what we wanted .
* If it ' s an indirect object , we ' ll find a Generation ID followed by the letter ' R '
* I . e . something like " 0 R "
2018-08-14 14:00:31 -07:00
*/
while ( ( bytes_remaining > 0 ) & & isdigit ( * index ) ) {
index + + ;
bytes_remaining - - ;
2018-06-01 14:23:25 -04:00
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
if ( ( bytes_remaining > 0 ) & & ( * index = = ' ' ) ) {
2018-06-02 20:58:35 -04:00
unsigned long genid ;
2018-08-14 14:00:31 -07:00
index + + ;
bytes_remaining - - ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " find_length: failed to parse object genid \n " ) ;
2018-06-02 20:58:35 -04:00
return 0 ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " find_length: Encountered invalid negative object genid (%ld). \n " , temp_long ) ;
return 0 ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
genid = ( unsigned long ) temp_long ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
while ( ( bytes_remaining > 0 ) & & isdigit ( * index ) ) {
2018-08-14 14:00:31 -07:00
index + + ;
bytes_remaining - - ;
}
if ( bytes_remaining < 2 ) {
return 0 ;
2018-06-01 14:23:25 -04:00
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
if ( index [ 0 ] = = ' ' & & index [ 1 ] = = ' R ' ) {
2019-03-05 21:15:41 -05:00
/*
* Ok so we found a genid and that ' R ' . Which means that first value
2018-08-14 14:00:31 -07:00
* was actually the objid .
* We can look up the indirect object using this information .
*/
2018-12-03 12:40:13 -05:00
unsigned long objid = length ;
const char * indirect_obj_start = NULL ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " find_length: length is in indirect object %lu %lu \n " , objid , genid ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
obj = find_obj ( pdf , obj , ( length < < 8 ) | ( genid & 0xff ) ) ;
2014-04-07 16:39:54 -04:00
if ( ! obj ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " find_length: indirect object not found \n " ) ;
2014-04-07 16:39:54 -04:00
return 0 ;
}
2018-08-14 14:00:31 -07:00
indirect_obj_start = pdf - > map + obj - > start ;
2018-12-03 12:40:13 -05:00
bytes_remaining = pdf - > size - obj - > start ;
2018-08-14 14:00:31 -07:00
/* Ok so we found the indirect object, lets read the value. */
index = pdf_nextobject ( indirect_obj_start , bytes_remaining ) ;
if ( ! index ) {
cli_dbgmsg ( " find_length: next object not found \n " ) ;
2014-04-07 16:39:54 -04:00
return 0 ;
}
2018-12-03 12:40:13 -05:00
2019-03-05 21:15:41 -05:00
if ( bytes_remaining < ( size_t ) ( index - indirect_obj_start ) ) {
2018-08-14 14:00:31 -07:00
return 0 ;
}
bytes_remaining - = index - indirect_obj_start ;
2014-04-07 16:39:54 -04:00
2019-01-22 14:15:46 -05:00
/* Found the value, so lets parse it as a long, but prohibit negative lengths. */
if ( CL_SUCCESS ! = cli_strntol_wrap ( index , bytes_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " find_length: failed to parse object length from indirect object \n " ) ;
2018-06-02 20:58:35 -04:00
return 0 ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " find_length: Encountered invalid negative obj length (%ld). \n " , temp_long ) ;
return 0 ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
length = ( size_t ) temp_long ;
2014-04-07 16:39:54 -04:00
}
2010-05-11 10:37:10 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-11 13:22:45 +03:00
/* limit length */
2019-03-05 21:15:41 -05:00
if ( ( size_t ) ( obj_start - pdf - > map ) + length + 5 > pdf - > size )
2018-08-14 14:00:31 -07:00
length = pdf - > size - ( obj_start - pdf - > map ) - 5 ;
2014-04-07 16:39:54 -04:00
2010-05-11 10:37:10 +03:00
return length ;
}
2011-12-23 17:40:22 +02:00
# define DUMP_MASK ((1 << OBJ_CONTENTS) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_A85) | (1 << OBJ_EMBEDDED_FILE) | (1 << OBJ_JAVASCRIPT) | (1 << OBJ_OPENACTION) | (1 << OBJ_LAUNCHACTION))
2010-05-11 11:26:35 +03:00
2014-04-07 16:39:54 -04:00
static int run_pdf_hooks ( struct pdf_struct * pdf , enum pdf_phase phase , int fd , int dumpid )
2010-08-02 15:42:58 +03:00
{
int ret ;
struct cli_bc_ctx * bc_ctx ;
cli_ctx * ctx = pdf - > ctx ;
fmap_t * map ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( dumpid ) ;
2010-08-02 15:42:58 +03:00
bc_ctx = cli_bytecode_context_alloc ( ) ;
if ( ! bc_ctx ) {
2018-08-14 14:00:31 -07:00
cli_errmsg ( " run_pdf_hooks: can't allocate memory for bc_ctx \n " ) ;
2014-04-07 16:39:54 -04:00
return CL_EMEM ;
2010-08-02 15:42:58 +03:00
}
map = * ctx - > fmap ;
if ( fd ! = - 1 ) {
2014-04-07 16:39:54 -04:00
map = fmap ( fd , 0 , 0 ) ;
if ( ! map ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " run_pdf_hooks: can't mmap pdf extracted obj \n " ) ;
2014-04-07 16:39:54 -04:00
map = * ctx - > fmap ;
2018-12-03 12:40:13 -05:00
fd = - 1 ;
2014-04-07 16:39:54 -04:00
}
2010-08-02 15:42:58 +03:00
}
2014-04-07 16:39:54 -04:00
cli_bytecode_context_setpdf ( bc_ctx , phase , pdf - > nobjs , pdf - > objs , & pdf - > flags , pdf - > size , pdf - > startoff ) ;
2010-08-02 15:42:58 +03:00
cli_bytecode_context_setctx ( bc_ctx , ctx ) ;
2012-10-18 14:12:58 -07:00
ret = cli_bytecode_runhook ( ctx , ctx - > engine , bc_ctx , BC_PDF , map ) ;
2010-08-02 15:42:58 +03:00
cli_bytecode_context_destroy ( bc_ctx ) ;
2014-04-07 16:39:54 -04:00
if ( fd ! = - 1 )
funmap ( map ) ;
2010-08-02 15:42:58 +03:00
return ret ;
}
2011-12-15 13:27:31 +02:00
static void dbg_printhex ( const char * msg , const char * hex , unsigned len ) ;
2014-07-10 18:11:49 -04:00
2017-08-16 17:31:45 -04:00
static void aes_decrypt ( const unsigned char * in , size_t * length , unsigned char * q , char * key , unsigned key_n , int has_iv )
2011-12-14 15:43:14 +02:00
{
unsigned long rk [ RKLENGTH ( 256 ) ] ;
unsigned char iv [ 16 ] ;
2017-08-16 17:31:45 -04:00
size_t len = * length ;
2011-12-14 15:43:14 +02:00
unsigned char pad , i ;
2011-12-18 10:05:06 +02:00
int nrounds ;
2011-12-14 15:43:14 +02:00
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " aes_decrypt: key length: %d, data length: %zu \n " , key_n , * length ) ;
2011-12-14 15:43:14 +02:00
if ( key_n > 32 ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " aes_decrypt: key length is %d! \n " , key_n * 8 ) ;
2014-04-07 16:39:54 -04:00
return ;
2011-12-14 15:43:14 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
if ( len < 32 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " aes_decrypt: len is <32: %zu \n " , len ) ;
noisy_warnmsg ( " aes_decrypt: len is <32: %zu \n " , len ) ;
2014-04-07 16:39:54 -04:00
return ;
2011-12-14 15:43:14 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-15 13:27:31 +02:00
if ( has_iv ) {
2014-04-07 16:39:54 -04:00
memcpy ( iv , in , 16 ) ;
in + = 16 ;
len - = 16 ;
} else {
memset ( iv , 0 , sizeof ( iv ) ) ;
}
2011-12-14 15:43:14 +02:00
2012-12-17 16:44:16 -05:00
cli_dbgmsg ( " aes_decrypt: Calling rijndaelSetupDecrypt \n " ) ;
2018-12-03 12:40:13 -05:00
nrounds = rijndaelSetupDecrypt ( rk , ( const unsigned char * ) key , key_n * 8 ) ;
2016-01-14 15:31:02 -05:00
if ( ! nrounds ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " aes_decrypt: nrounds = 0 \n " ) ;
return ;
2016-01-14 15:31:02 -05:00
}
2012-12-17 16:44:16 -05:00
cli_dbgmsg ( " aes_decrypt: Beginning rijndaelDecrypt \n " ) ;
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
while ( len > = 16 ) {
2014-04-07 16:39:54 -04:00
unsigned i ;
rijndaelDecrypt ( rk , nrounds , in , q ) ;
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 16 ; i + + )
2014-04-07 16:39:54 -04:00
q [ i ] ^ = iv [ i ] ;
memcpy ( iv , in , 16 ) ;
q + = 16 ;
in + = 16 ;
len - = 16 ;
2011-12-14 15:43:14 +02:00
}
2011-12-15 13:27:31 +02:00
if ( has_iv ) {
2014-04-07 16:39:54 -04:00
len + = 16 ;
pad = q [ - 1 ] ;
if ( pad > 0x10 ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " aes_decrypt: bad pad: %x (extra len: %zu) \n " , pad , len - 16 ) ;
noisy_warnmsg ( " aes_decrypt: bad pad: %x (extra len: %zu) \n " , pad , len - 16 ) ;
2014-04-07 16:39:54 -04:00
* length - = len ;
return ;
}
q - = pad ;
2018-12-03 12:40:13 -05:00
for ( i = 1 ; i < pad ; i + + ) {
2014-04-07 16:39:54 -04:00
if ( q [ i ] ! = pad ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " aes_decrypt: bad pad: %x != %x \n " , q [ i ] , pad ) ;
noisy_warnmsg ( " aes_decrypt: bad pad: %x != %x \n " , q [ i ] , pad ) ;
2014-04-07 16:39:54 -04:00
* length - = len ;
return ;
}
}
len + = pad ;
2011-12-14 15:43:14 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
* length - = len ;
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " aes_decrypt: length is %zu \n " , * length ) ;
2011-12-14 15:43:14 +02:00
}
2017-08-16 17:31:45 -04:00
char * decrypt_any ( struct pdf_struct * pdf , uint32_t id , const char * in , size_t * length , enum enc_method enc_method )
2011-12-14 15:43:14 +02:00
{
unsigned char * key , * q , result [ 16 ] ;
unsigned n ;
struct arc4_state arc4 ;
2011-12-28 19:05:57 +02:00
if ( ! length | | ! * length | | ! in ) {
2018-12-03 12:40:13 -05:00
noisy_warnmsg ( " decrypt_any: decrypt failed for obj %u %u \n " , id > > 8 , id & 0xff ) ;
2014-04-07 16:39:54 -04:00
return NULL ;
2011-12-28 19:05:57 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
n = pdf - > keylen + 5 ;
2011-12-15 17:24:36 +02:00
if ( enc_method = = ENC_AESV2 )
2014-04-07 16:39:54 -04:00
n + = 4 ;
2011-12-14 15:43:14 +02:00
key = cli_malloc ( n ) ;
2011-12-28 19:05:57 +02:00
if ( ! key ) {
2014-04-07 16:39:54 -04:00
noisy_warnmsg ( " decrypt_any: malloc failed \n " ) ;
return NULL ;
2011-12-28 19:05:57 +02:00
}
2011-12-14 15:43:14 +02:00
memcpy ( key , pdf - > key , pdf - > keylen ) ;
2018-12-03 12:40:13 -05:00
q = key + pdf - > keylen ;
2011-12-14 15:43:14 +02:00
* q + + = id > > 8 ;
* q + + = id > > 16 ;
* q + + = id > > 24 ;
* q + + = id ;
* q + + = 0 ;
2011-12-15 17:24:36 +02:00
if ( enc_method = = ENC_AESV2 )
2014-04-07 16:39:54 -04:00
memcpy ( q , " sAlT " , 4 ) ;
2014-02-08 00:31:12 -05:00
cl_hash_data ( " md5 " , key , n , result , NULL ) ;
2011-12-15 13:27:31 +02:00
free ( key ) ;
2011-12-14 15:43:14 +02:00
n = pdf - > keylen + 5 ;
if ( n > 16 )
2014-04-07 16:39:54 -04:00
n = 16 ;
2011-12-14 15:43:14 +02:00
2016-01-27 14:17:47 -05:00
q = cli_calloc ( * length , sizeof ( char ) ) ;
2011-12-28 19:05:57 +02:00
if ( ! q ) {
2014-04-07 16:39:54 -04:00
noisy_warnmsg ( " decrypt_any: malloc failed \n " ) ;
return NULL ;
2011-12-28 19:05:57 +02:00
}
2011-12-14 15:43:14 +02:00
2011-12-15 17:24:36 +02:00
switch ( enc_method ) {
2018-12-03 12:40:13 -05:00
case ENC_V2 :
cli_dbgmsg ( " cli_pdf: enc is v2 \n " ) ;
memcpy ( q , in , * length ) ;
arc4_init ( & arc4 , result , n ) ;
arc4_apply ( & arc4 , q , ( unsigned ) * length ) ; /* TODO: may truncate for very large lengths */
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
noisy_msg ( pdf , " decrypt_any: decrypted ARC4 data \n " ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ENC_AESV2 :
cli_dbgmsg ( " cli_pdf: enc is aesv2 \n " ) ;
aes_decrypt ( ( const unsigned char * ) in , length , q , ( char * ) result , n , 1 ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
noisy_msg ( pdf , " decrypt_any: decrypted AES(v2) data \n " ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ENC_AESV3 :
cli_dbgmsg ( " decrypt_any: enc is aesv3 \n " ) ;
if ( pdf - > keylen = = 0 ) {
cli_dbgmsg ( " decrypt_any: no key \n " ) ;
return NULL ;
}
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
aes_decrypt ( ( const unsigned char * ) in , length , q , pdf - > key , pdf - > keylen , 1 ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
noisy_msg ( pdf , " decrypted AES(v3) data \n " ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ENC_IDENTITY :
cli_dbgmsg ( " decrypt_any: enc is identity \n " ) ;
memcpy ( q , in , * length ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
noisy_msg ( pdf , " decrypt_any: identity encryption \n " ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ENC_NONE :
cli_dbgmsg ( " decrypt_any: enc is none \n " ) ;
noisy_msg ( pdf , " encryption is none \n " ) ;
free ( q ) ;
return NULL ;
case ENC_UNKNOWN :
cli_dbgmsg ( " decrypt_any: enc is unknown \n " ) ;
free ( q ) ;
noisy_warnmsg ( " decrypt_any: unknown encryption method for obj %u %u \n " ,
id > > 8 , id & 0xff ) ;
return NULL ;
2011-12-14 15:43:14 +02:00
}
2014-04-07 16:39:54 -04:00
2014-07-10 18:11:49 -04:00
return ( char * ) q ;
2011-12-14 15:43:14 +02:00
}
2015-03-20 15:10:52 -04:00
enum enc_method get_enc_method ( struct pdf_struct * pdf , struct pdf_obj * obj )
2011-12-15 17:24:36 +02:00
{
if ( obj - > flags & ( 1 < < OBJ_EMBEDDED_FILE ) )
2014-04-07 16:39:54 -04:00
return pdf - > enc_method_embeddedfile ;
2011-12-15 17:24:36 +02:00
if ( obj - > flags & ( 1 < < OBJ_STREAM ) )
2014-04-07 16:39:54 -04:00
return pdf - > enc_method_stream ;
2011-12-15 17:24:36 +02:00
return pdf - > enc_method_string ;
}
2011-12-23 17:40:22 +02:00
enum cstate {
CSTATE_NONE ,
CSTATE_TJ ,
CSTATE_TJ_PAROPEN
} ;
static void process ( struct text_norm_state * s , enum cstate * st , const char * buf , int length , int fout )
{
do {
2014-04-07 16:39:54 -04:00
switch ( * st ) {
2018-12-03 12:40:13 -05:00
case CSTATE_NONE :
if ( * buf = = ' [ ' ) {
* st = CSTATE_TJ ;
} else {
const char * nl = memchr ( buf , ' \n ' , length ) ;
if ( ! nl )
return ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
length - = nl - buf ;
buf = nl ;
}
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case CSTATE_TJ :
if ( * buf = = ' ( ' )
* st = CSTATE_TJ_PAROPEN ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case CSTATE_TJ_PAROPEN :
if ( * buf = = ' ) ' ) {
* st = CSTATE_TJ ;
} else {
if ( text_normalize_buffer ( s , ( const unsigned char * ) buf , 1 ) ! = 1 ) {
cli_writen ( fout , s - > out , s - > out_pos ) ;
text_normalize_reset ( s ) ;
}
2014-04-07 16:39:54 -04:00
}
2018-12-03 12:40:13 -05:00
break ;
2014-04-07 16:39:54 -04:00
}
buf + + ;
length - - ;
2011-12-23 17:40:22 +02:00
} while ( length > 0 ) ;
}
static int pdf_scan_contents ( int fd , struct pdf_struct * pdf )
{
struct text_norm_state s ;
char fullname [ 1024 ] ;
char outbuff [ BUFSIZ ] ;
char inbuf [ BUFSIZ ] ;
2018-08-14 14:00:31 -07:00
int fout , n ;
cl_error_t rc ;
2011-12-23 17:40:22 +02:00
enum cstate st = CSTATE_NONE ;
2018-12-03 12:40:13 -05:00
snprintf ( fullname , sizeof ( fullname ) , " %s " PATHSEP " pdf%02u_c " , pdf - > dir , ( pdf - > files - 1 ) ) ;
fout = open ( fullname , O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY , 0600 ) ;
2011-12-23 17:40:22 +02:00
if ( fout < 0 ) {
2014-04-07 16:39:54 -04:00
char err [ 128 ] ;
2018-08-14 14:00:31 -07:00
cli_errmsg ( " pdf_scan_contents: can't create temporary file %s: %s \n " , fullname , cli_strerror ( errno , err , sizeof ( err ) ) ) ;
2014-04-07 16:39:54 -04:00
return CL_ETMPFILE ;
2011-12-23 17:40:22 +02:00
}
2014-07-10 18:11:49 -04:00
text_normalize_init ( & s , ( unsigned char * ) outbuff , sizeof ( outbuff ) ) ;
2011-12-23 17:40:22 +02:00
while ( 1 ) {
2014-04-07 16:39:54 -04:00
n = cli_readn ( fd , inbuf , sizeof ( inbuf ) ) ;
if ( n < = 0 )
break ;
process ( & s , & st , inbuf , n , fout ) ;
2011-12-23 17:40:22 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-23 17:40:22 +02:00
cli_writen ( fout , s . out , s . out_pos ) ;
2011-12-28 19:05:57 +02:00
lseek ( fout , 0 , SEEK_SET ) ;
2018-07-30 20:19:28 -04:00
rc = cli_magic_scandesc ( fout , fullname , pdf - > ctx ) ;
2011-12-23 17:40:22 +02:00
close ( fout ) ;
2014-04-07 16:39:54 -04:00
2011-12-28 19:05:57 +02:00
if ( ! pdf - > ctx - > engine - > keeptmp )
2014-04-07 16:39:54 -04:00
if ( cli_unlink ( fullname ) & & rc ! = CL_VIRUS )
rc = CL_EUNLINK ;
2011-12-28 19:05:57 +02:00
return rc ;
2011-12-23 17:40:22 +02:00
}
2014-06-25 13:36:30 -04:00
int pdf_extract_obj ( struct pdf_struct * pdf , struct pdf_obj * obj , uint32_t flags )
2010-05-11 10:37:10 +03:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2010-05-11 11:26:35 +03:00
char fullname [ NAME_MAX + 1 ] ;
2018-12-03 12:40:13 -05:00
int fout = - 1 ;
size_t sum = 0 ;
2018-08-14 14:00:31 -07:00
cl_error_t rc = CL_SUCCESS ;
2018-12-03 12:40:13 -05:00
int dump = 1 ;
2010-05-11 11:26:35 +03:00
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_extract_obj: obj %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2013-03-21 11:53:28 -04:00
2018-10-01 19:46:23 -04:00
if ( obj - > objstm ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_extract_obj: extracting obj found in objstm. \n " ) ;
2018-10-01 19:46:23 -04:00
if ( obj - > objstm - > streambuf = = NULL ) {
cli_warnmsg ( " pdf_extract_obj: object in object stream has null stream buffer! \n " ) ;
return CL_EFORMAT ;
}
}
2018-08-14 14:00:31 -07:00
2010-08-01 22:14:44 +03:00
/* TODO: call bytecode hook here, allow override dumpability */
2014-04-07 16:39:54 -04:00
if ( ( ! ( obj - > flags & ( 1 < < OBJ_STREAM ) ) | | ( obj - > flags & ( 1 < < OBJ_HASFILTERS ) ) ) & & ! ( obj - > flags & DUMP_MASK ) ) {
/* don't dump all streams */
dump = 0 ;
2010-05-11 11:26:35 +03:00
}
2014-04-07 16:39:54 -04:00
if ( ( obj - > flags & ( 1 < < OBJ_IMAGE ) ) & & ! ( obj - > flags & ( 1 < < OBJ_FILTER_DCT ) ) ) {
/* don't dump / scan non-JPG images */
dump = 0 ;
2010-08-01 22:14:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-08-02 15:42:58 +03:00
if ( obj - > flags & ( 1 < < OBJ_FORCEDUMP ) ) {
2014-04-07 16:39:54 -04:00
/* bytecode can force dump by setting this flag */
dump = 1 ;
2010-08-02 15:42:58 +03:00
}
2014-04-07 16:39:54 -04:00
2010-08-02 15:42:58 +03:00
if ( ! dump )
2014-04-07 16:39:54 -04:00
return CL_CLEAN ;
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_extract_obj: dumping obj %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
snprintf ( fullname , sizeof ( fullname ) , " %s " PATHSEP " pdf%02u " , pdf - > dir , pdf - > files + + ) ;
fout = open ( fullname , O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY , 0600 ) ;
2010-05-11 11:26:35 +03:00
if ( fout < 0 ) {
2014-04-07 16:39:54 -04:00
char err [ 128 ] ;
2018-08-14 14:00:31 -07:00
cli_errmsg ( " pdf_extract_obj: can't create temporary file %s: %s \n " , fullname , cli_strerror ( errno , err , sizeof ( err ) ) ) ;
2014-04-07 16:39:54 -04:00
return CL_ETMPFILE ;
2010-05-11 11:26:35 +03:00
}
2014-06-10 13:21:31 -04:00
if ( ! ( flags & PDF_EXTRACT_OBJ_SCAN ) )
obj - > path = strdup ( fullname ) ;
2019-03-05 21:15:41 -05:00
if ( ( NULL = = obj - > objstm ) & &
( obj - > flags & ( 1 < < OBJ_STREAM ) ) ) {
/*
* Object contains a stream . Parse this now .
*/
cli_dbgmsg ( " pdf_extract_obj: parsing a stream in obj %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
const char * start = pdf - > map + obj - > start ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
size_t length ;
size_t orig_length ;
int dict_len = obj - > stream - start ; /* Dictionary should end where the stream begins */
2012-01-18 20:58:38 +02:00
2019-03-05 21:15:41 -05:00
const char * pstr ;
struct pdf_dict * dparams = NULL ;
struct objstm_struct * objstm = NULL ;
int xref = 0 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* Find and interpret the length dictionary value */
length = find_length ( pdf , obj , start , dict_len ) ;
if ( length < 0 )
length = 0 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
orig_length = length ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( length > obj - > stream_size ) {
cli_dbgmsg ( " cli_pdf: Stream length exceeds object length by %zu bytes. Length truncated to %zu bytes \n " , length - obj - > stream_size , obj - > stream_size ) ;
noisy_warnmsg ( " Stream length exceeds object length by %zu bytes. Length truncated to %zu bytes \n " , length - obj - > stream_size , obj - > stream_size ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
length = obj - > stream_size ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( ! ( obj - > flags & ( 1 < < OBJ_FILTER_FLATE ) ) & & ( length < = 0 ) ) {
/*
* If the length is unknown and this doesn ' t contain a FLATE encoded filter . . .
* Calculate the length using the stream size , and trimming
* off any newline / carriage returns from the end of the stream .
*/
const char * q = start + obj - > stream_size ;
length = obj - > stream_size ;
q - - ;
if ( * q = = ' \n ' ) {
q - - ;
length - - ;
if ( * q = = ' \r ' )
length - - ;
} else if ( * q = = ' \r ' ) {
length - - ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( length < 0 )
length = 0 ;
cli_dbgmsg ( " pdf_extract_obj: calculated length %lld \n " , ( long long ) length ) ;
} else {
if ( obj - > stream_size > ( size_t ) length + 2 ) {
cli_dbgmsg ( " cli_pdf: calculated length %zu < %zu \n " ,
( size_t ) length , obj - > stream_size ) ;
length = obj - > stream_size ;
}
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( ( 0 ! = orig_length ) & & ( obj - > stream_size > ( size_t ) orig_length + 20 ) ) {
cli_dbgmsg ( " pdf_extract_obj: orig length: %lld, length: %lld, size: %zu \n " ,
( long long ) orig_length , ( long long ) length , obj - > stream_size ) ;
pdfobj_flag ( pdf , obj , BAD_STREAMLEN ) ;
}
2016-04-18 17:11:12 -04:00
2019-03-05 21:15:41 -05:00
if ( 0 = = length ) {
length = obj - > stream_size ;
if ( 0 = = length ) {
cli_dbgmsg ( " pdf_extract_obj: Alleged or calculated stream length and stream buffer size both 0 \n " ) ;
goto done ; /* Empty stream, nothing to scan */
}
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* Check if XRef is enabled */
if ( cli_memstr ( start , dict_len , " /XRef " , strlen ( " /XRef " ) ) ) {
xref = 1 ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
cli_dbgmsg ( " -------------EXPERIMENTAL------------- \n " ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/*
* Identify the DecodeParms , if available .
*/
if ( NULL ! = ( pstr = pdf_getdict ( start , & dict_len , " /DecodeParms " ) ) ) {
cli_dbgmsg ( " pdf_extract_obj: Found /DecodeParms \n " ) ;
} else if ( NULL ! = ( pstr = pdf_getdict ( start , & dict_len , " /DP " ) ) ) {
cli_dbgmsg ( " pdf_extract_obj: Found /DP \n " ) ;
}
2016-04-13 18:29:55 -04:00
2019-03-05 21:15:41 -05:00
if ( pstr ) {
/* shift pstr left to "<<" for pdf_parse_dict */
while ( ( * pstr = = ' < ' ) & & ( pstr > start ) ) {
pstr - - ;
dict_len + + ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* shift pstr right to "<<" for pdf_parse_dict */
while ( ( * pstr ! = ' < ' ) & & ( dict_len > 0 ) ) {
pstr + + ;
dict_len - - ;
}
if ( dict_len > 4 )
dparams = pdf_parse_dict ( pdf , obj , obj - > size , ( char * ) pstr , NULL ) ;
else
cli_dbgmsg ( " pdf_extract_obj: failed to locate DecodeParms dictionary start \n " ) ;
}
/*
* Go back to the start of the dictionary and check to see if the stream
* is an object stream . If so , collect the relevant info .
*/
dict_len = obj - > stream - start ;
if ( NULL ! = ( pstr = pdf_getdict ( start , & dict_len , " /Type/ObjStm " ) ) ) {
int32_t objstm_first = - 1 ;
int32_t objstm_length = - 1 ;
int32_t objstm_n = - 1 ;
cli_dbgmsg ( " pdf_extract_obj: Found /Type/ObjStm \n " ) ;
dict_len = obj - > stream - start ;
if ( ( - 1 = = ( objstm_first = pdf_readint ( start , dict_len , " /First " ) ) ) ) {
cli_warnmsg ( " pdf_extract_obj: Failed to find offset of first object in object stream \n " ) ;
} else if ( ( - 1 = = ( objstm_length = pdf_readint ( start , dict_len , " /Length " ) ) ) ) {
cli_warnmsg ( " pdf_extract_obj: Failed to find length of object stream \n " ) ;
} else if ( ( - 1 = = ( objstm_n = pdf_readint ( start , dict_len , " /N " ) ) ) ) {
cli_warnmsg ( " pdf_extract_obj: Failed to find num objects in object stream \n " ) ;
} else {
/* Add objstm to pdf struct, so it can be freed eventually */
pdf - > nobjstms + + ;
pdf - > objstms = cli_realloc2 ( pdf - > objstms , sizeof ( struct objstm_struct * ) * pdf - > nobjstms ) ;
if ( ! pdf - > objstms ) {
cli_warnmsg ( " pdf_extract_obj: out of memory parsing object stream (%u) \n " , pdf - > nobjstms ) ;
pdf_free_dict ( dparams ) ;
return CL_EMEM ;
2018-08-14 14:00:31 -07:00
}
2019-03-05 21:15:41 -05:00
objstm = malloc ( sizeof ( struct objstm_struct ) ) ;
if ( ! objstm ) {
cli_warnmsg ( " pdf_extract_obj: out of memory parsing object stream (%u) \n " , pdf - > nobjstms ) ;
pdf_free_dict ( dparams ) ;
return CL_EMEM ;
}
pdf - > objstms [ pdf - > nobjstms - 1 ] = objstm ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
memset ( objstm , 0 , sizeof ( * objstm ) ) ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
objstm - > first = ( uint32_t ) objstm_first ;
objstm - > current = ( uint32_t ) objstm_first ;
objstm - > current_pair = 0 ;
objstm - > length = ( uint32_t ) objstm_length ;
objstm - > n = ( uint32_t ) objstm_n ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
cli_dbgmsg ( " pdf_extract_obj: ObjStm first obj at offset %d \n " , objstm - > first ) ;
cli_dbgmsg ( " pdf_extract_obj: ObjStm length is %d bytes \n " , objstm - > length ) ;
cli_dbgmsg ( " pdf_extract_obj: ObjStm should contain %d objects \n " , objstm - > n ) ;
}
}
sum = pdf_decodestream ( pdf , obj , dparams , obj - > stream , ( uint32_t ) length , xref , fout , & rc , objstm ) ;
if ( ( CL_SUCCESS ! = rc ) & & ( CL_VIRUS ! = rc ) ) {
cli_dbgmsg ( " Error decoding stream! Error code: %d \n " , rc ) ;
/* It's ok if we couldn't decode the stream,
* make a best effort to keep parsing . */
if ( CL_EPARSE = = rc )
rc = CL_SUCCESS ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
if ( NULL ! = objstm ) {
/*
* If we were expecting an objstm and there was a failure . . .
* discard the memory for last object stream .
*/
if ( NULL ! = pdf - > objstms ) {
if ( NULL ! = pdf - > objstms [ pdf - > nobjstms - 1 ] ) {
if ( NULL ! = pdf - > objstms [ pdf - > nobjstms - 1 ] - > streambuf ) {
free ( pdf - > objstms [ pdf - > nobjstms - 1 ] - > streambuf ) ;
pdf - > objstms [ pdf - > nobjstms - 1 ] - > streambuf = NULL ;
}
free ( pdf - > objstms [ pdf - > nobjstms - 1 ] ) ;
pdf - > objstms [ pdf - > nobjstms - 1 ] = NULL ;
2018-08-14 14:00:31 -07:00
}
2019-03-05 21:15:41 -05:00
/* Pop the objstm off the end of the pdf->objstms array. */
if ( pdf - > nobjstms > 0 ) {
pdf - > nobjstms - - ;
if ( 0 = = pdf - > nobjstms ) {
free ( pdf - > objstms ) ;
pdf - > objstms = NULL ;
} else {
pdf - > objstms = cli_realloc2 ( pdf - > objstms , sizeof ( struct objstm_struct * ) * pdf - > nobjstms ) ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
if ( ! pdf - > objstms ) {
cli_warnmsg ( " pdf_extract_obj: out of memory when shrinking down objstm array \n " ) ;
return CL_EMEM ;
2018-08-14 14:00:31 -07:00
}
}
2019-03-05 21:15:41 -05:00
} else {
/* hm.. this shouldn't happen */
cli_warnmsg ( " pdf_extract_obj: Failure counting objstms. \n " ) ;
2018-08-14 14:00:31 -07:00
}
2014-04-07 16:39:54 -04:00
}
2019-03-05 21:15:41 -05:00
}
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( dparams )
pdf_free_dict ( dparams ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( ( rc = = CL_VIRUS ) & & ! SCAN_ALLMATCHES ) {
sum = 0 ; /* prevents post-filter scan */
goto done ;
}
2012-01-18 20:58:38 +02:00
2019-03-05 21:15:41 -05:00
cli_dbgmsg ( " -------------EXPERIMENTAL------------- \n " ) ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
} else if ( obj - > flags & ( 1 < < OBJ_JAVASCRIPT ) ) {
const char * q2 ;
const char * q = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
/* TODO: get obj-endobj size */
off_t bytesleft = obj - > size ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( bytesleft < 0 ) {
goto done ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
do {
char * js = NULL ;
size_t js_len = 0 ;
const char * q3 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
q2 = cli_memstr ( q , bytesleft , " /JavaScript " , 11 ) ;
if ( ! q2 )
break ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
bytesleft - = q2 - q + 11 ;
q = q2 + 11 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
js = pdf_readstring ( q , bytesleft , " /JS " , NULL , & q2 , ! ( pdf - > flags & ( 1 < < DECRYPTABLE_PDF ) ) ) ;
bytesleft - = q2 - q ;
q = q2 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( js ) {
char * decrypted = NULL ;
const char * out = js ;
js_len = strlen ( js ) ;
if ( pdf - > flags & ( 1 < < DECRYPTABLE_PDF ) ) {
cli_dbgmsg ( " pdf_extract_obj: encrypted string \n " ) ;
decrypted = decrypt_any ( pdf , obj - > id , js , & js_len , pdf - > enc_method_string ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( decrypted ) {
noisy_msg ( pdf , " pdf_extract_obj: decrypted Javascript string from obj %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
out = decrypted ;
2014-04-07 16:39:54 -04:00
}
2019-03-05 21:15:41 -05:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( filter_writen ( pdf , obj , fout , out , js_len , ( size_t * ) & sum ) ! = js_len ) {
rc = CL_EWRITE ;
2014-04-07 16:39:54 -04:00
free ( js ) ;
2019-03-05 21:15:41 -05:00
break ;
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
free ( decrypted ) ;
free ( js ) ;
cli_dbgmsg ( " pdf_extract_obj: bytesleft: %d \n " , ( int ) bytesleft ) ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
if ( bytesleft > 0 ) {
q2 = pdf_nextobject ( q , bytesleft ) ;
if ( ! q2 )
q2 = q + bytesleft - 1 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
/* non-conforming PDFs that don't escape ) properly */
q3 = memchr ( q , ' ) ' , bytesleft ) ;
if ( q3 & & q3 < q2 )
q2 = q3 ;
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
while ( q2 > q & & q2 [ - 1 ] = = ' ' )
q2 - - ;
if ( q2 > q ) {
q - - ;
filter_writen ( pdf , obj , fout , q , q2 - q , ( size_t * ) & sum ) ;
q + + ;
2014-04-07 16:39:54 -04:00
}
}
2019-03-05 21:15:41 -05:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
} while ( bytesleft > 0 ) ;
} else {
off_t bytesleft = obj - > size ;
if ( bytesleft < 0 )
rc = CL_EFORMAT ;
else {
if ( obj - > objstm ) {
if ( filter_writen ( pdf , obj , fout , obj - > objstm - > streambuf + obj - > start , bytesleft , ( size_t * ) & sum ) ! = ( size_t ) bytesleft )
rc = CL_EWRITE ;
} else {
if ( filter_writen ( pdf , obj , fout , pdf - > map + obj - > start , bytesleft , ( size_t * ) & sum ) ! = ( size_t ) bytesleft )
rc = CL_EWRITE ;
2018-10-25 13:06:15 -07:00
}
2014-04-07 16:39:54 -04:00
}
2019-03-05 21:15:41 -05:00
}
done :
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_extract_obj: extracted %td bytes %u %u obj \n " , sum , obj - > id > > 8 , obj - > id & 0xff ) ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_extract_obj: ... to %s \n " , fullname ) ;
2014-04-07 16:39:54 -04:00
2014-06-10 13:21:31 -04:00
if ( flags & PDF_EXTRACT_OBJ_SCAN & & sum ) {
2014-04-07 16:39:54 -04:00
int rc2 ;
cli_updatelimits ( pdf - > ctx , sum ) ;
/* TODO: invoke bytecode on this pdf obj with metainformation associated */
lseek ( fout , 0 , SEEK_SET ) ;
2018-07-30 20:19:28 -04:00
rc2 = cli_magic_scandesc ( fout , fullname , pdf - > ctx ) ;
2014-04-07 16:39:54 -04:00
if ( rc2 = = CL_VIRUS | | rc = = CL_SUCCESS )
rc = rc2 ;
2018-07-20 22:28:48 -04:00
if ( ( rc = = CL_CLEAN ) | | ( ( rc = = CL_VIRUS ) & & SCAN_ALLMATCHES ) ) {
2018-08-14 14:00:31 -07:00
unsigned int dumpid = 0 ;
for ( dumpid = 0 ; dumpid < pdf - > nobjs ; dumpid + + ) {
if ( pdf - > objs [ dumpid ] = = obj )
break ;
}
rc2 = run_pdf_hooks ( pdf , PDF_PHASE_POSTDUMP , fout , dumpid ) ;
2014-04-07 16:39:54 -04:00
if ( rc2 = = CL_VIRUS )
rc = rc2 ;
}
2018-07-20 22:28:48 -04:00
if ( ( ( rc = = CL_CLEAN ) | | ( ( rc = = CL_VIRUS ) & & SCAN_ALLMATCHES ) ) & & ( obj - > flags & ( 1 < < OBJ_CONTENTS ) ) ) {
2014-04-07 16:39:54 -04:00
lseek ( fout , 0 , SEEK_SET ) ;
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_extract_obj: dumping contents %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-04-07 16:39:54 -04:00
rc2 = pdf_scan_contents ( fout , pdf ) ;
if ( rc2 = = CL_VIRUS )
rc = rc2 ;
2018-12-03 12:40:13 -05:00
noisy_msg ( pdf , " pdf_extract_obj: extracted text from obj %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-04-07 16:39:54 -04:00
}
2010-08-02 15:42:58 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-11 11:26:35 +03:00
close ( fout ) ;
2014-04-07 16:39:54 -04:00
2014-06-10 13:21:31 -04:00
if ( flags & PDF_EXTRACT_OBJ_SCAN & & ! pdf - > ctx - > engine - > keeptmp )
2014-04-07 16:39:54 -04:00
if ( cli_unlink ( fullname ) & & rc ! = CL_VIRUS )
rc = CL_EUNLINK ;
2010-05-11 10:37:10 +03:00
return rc ;
}
2010-05-10 23:41:34 +03:00
enum objstate {
STATE_NONE ,
STATE_S ,
STATE_FILTER ,
2010-05-11 10:37:10 +03:00
STATE_JAVASCRIPT ,
2010-07-30 14:23:10 +03:00
STATE_OPENACTION ,
2010-07-30 20:26:59 +03:00
STATE_LINEARIZED ,
2011-10-08 12:20:21 +03:00
STATE_LAUNCHACTION ,
2011-12-23 17:40:22 +02:00
STATE_CONTENTS ,
2010-05-10 23:41:34 +03:00
STATE_ANY /* for actions table below */
} ;
2018-12-03 12:40:13 -05:00
# define NAMEFLAG_NONE 0x0
# define NAMEFLAG_HEURISTIC 0x1
2014-07-30 14:20:45 -04:00
2010-05-10 23:41:34 +03:00
struct pdfname_action {
const char * pdfname ;
2018-12-03 12:40:13 -05:00
enum pdf_objflags set_objflag ; /* OBJ_DICT is noop */
enum objstate from_state ; /* STATE_NONE is noop */
2010-05-10 23:41:34 +03:00
enum objstate to_state ;
2014-07-30 14:20:45 -04:00
uint32_t nameflags ;
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
void ( * pdf_stats_cb ) ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act ) ;
2014-07-08 19:53:41 -04:00
# endif
2010-05-10 23:41:34 +03:00
} ;
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2010-05-10 23:41:34 +03:00
static struct pdfname_action pdfname_actions [ ] = {
2014-07-30 14:20:45 -04:00
{ " ASCIIHexDecode " , OBJ_FILTER_AH , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , ASCIIHexDecode_cb } ,
{ " ASCII85Decode " , OBJ_FILTER_A85 , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , ASCII85Decode_cb } ,
{ " A85 " , OBJ_FILTER_A85 , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , ASCII85Decode_cb } ,
{ " AHx " , OBJ_FILTER_AH , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , ASCIIHexDecode_cb } ,
{ " EmbeddedFile " , OBJ_EMBEDDED_FILE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC , EmbeddedFile_cb } ,
{ " FlateDecode " , OBJ_FILTER_FLATE , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , FlateDecode_cb } ,
{ " Fl " , OBJ_FILTER_FLATE , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , FlateDecode_cb } ,
{ " Image " , OBJ_IMAGE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC , Image_cb } ,
{ " LZWDecode " , OBJ_FILTER_LZW , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , LZWDecode_cb } ,
{ " LZW " , OBJ_FILTER_LZW , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , LZWDecode_cb } ,
{ " RunLengthDecode " , OBJ_FILTER_RL , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , RunLengthDecode_cb } ,
{ " RL " , OBJ_FILTER_RL , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , RunLengthDecode_cb } ,
{ " CCITTFaxDecode " , OBJ_FILTER_FAX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , CCITTFaxDecode_cb } ,
{ " CCF " , OBJ_FILTER_FAX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , CCITTFaxDecode_cb } ,
{ " JBIG2Decode " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , JBIG2Decode_cb } ,
{ " DCTDecode " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , DCTDecode_cb } ,
{ " DCT " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , DCTDecode_cb } ,
{ " JPXDecode " , OBJ_FILTER_JPX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , JPXDecode_cb } ,
2018-12-03 12:40:13 -05:00
{ " Crypt " , OBJ_FILTER_CRYPT , STATE_FILTER , STATE_NONE , NAMEFLAG_HEURISTIC , Crypt_cb } ,
2014-07-30 14:20:45 -04:00
{ " Standard " , OBJ_FILTER_STANDARD , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC , Standard_cb } ,
2018-12-03 12:40:13 -05:00
{ " Sig " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC , Sig_cb } ,
{ " V " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC , NULL } ,
{ " R " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC , NULL } ,
2014-07-30 14:20:45 -04:00
{ " Linearized " , OBJ_DICT , STATE_NONE , STATE_LINEARIZED , NAMEFLAG_HEURISTIC , NULL } ,
{ " Filter " , OBJ_HASFILTERS , STATE_ANY , STATE_FILTER , NAMEFLAG_HEURISTIC , NULL } ,
{ " JavaScript " , OBJ_JAVASCRIPT , STATE_S , STATE_JAVASCRIPT , NAMEFLAG_HEURISTIC , JavaScript_cb } ,
{ " Length " , OBJ_DICT , STATE_FILTER , STATE_NONE , NAMEFLAG_HEURISTIC , NULL } ,
{ " S " , OBJ_DICT , STATE_NONE , STATE_S , NAMEFLAG_HEURISTIC , NULL } ,
{ " Type " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC , NULL } ,
{ " OpenAction " , OBJ_OPENACTION , STATE_ANY , STATE_OPENACTION , NAMEFLAG_HEURISTIC , OpenAction_cb } ,
{ " Launch " , OBJ_LAUNCHACTION , STATE_ANY , STATE_LAUNCHACTION , NAMEFLAG_HEURISTIC , Launch_cb } ,
{ " Page " , OBJ_PAGE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC , Page_cb } ,
{ " Contents " , OBJ_CONTENTS , STATE_NONE , STATE_CONTENTS , NAMEFLAG_HEURISTIC , NULL } ,
{ " Author " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Author_cb } ,
{ " Producer " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Producer_cb } ,
{ " CreationDate " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , CreationDate_cb } ,
{ " ModDate " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , ModificationDate_cb } ,
{ " Creator " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Creator_cb } ,
{ " Title " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Title_cb } ,
{ " Keywords " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Keywords_cb } ,
{ " Subject " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Subject_cb } ,
{ " Pages " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Pages_cb } ,
{ " Colors " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , Colors_cb } ,
{ " RichMedia " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , RichMedia_cb } ,
{ " AcroForm " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , AcroForm_cb } ,
2018-12-03 12:40:13 -05:00
{ " XFA " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_NONE , XFA_cb } } ;
2014-07-08 19:53:41 -04:00
# else
static struct pdfname_action pdfname_actions [ ] = {
2014-07-30 14:20:45 -04:00
{ " ASCIIHexDecode " , OBJ_FILTER_AH , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " ASCII85Decode " , OBJ_FILTER_A85 , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " A85 " , OBJ_FILTER_A85 , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " AHx " , OBJ_FILTER_AH , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " EmbeddedFile " , OBJ_EMBEDDED_FILE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " FlateDecode " , OBJ_FILTER_FLATE , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " Fl " , OBJ_FILTER_FLATE , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " Image " , OBJ_IMAGE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " LZWDecode " , OBJ_FILTER_LZW , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " LZW " , OBJ_FILTER_LZW , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " RunLengthDecode " , OBJ_FILTER_RL , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " RL " , OBJ_FILTER_RL , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " CCITTFaxDecode " , OBJ_FILTER_FAX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " CCF " , OBJ_FILTER_FAX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " JBIG2Decode " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " DCTDecode " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " DCT " , OBJ_FILTER_DCT , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " JPXDecode " , OBJ_FILTER_JPX , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
2018-12-03 12:40:13 -05:00
{ " Crypt " , OBJ_FILTER_CRYPT , STATE_FILTER , STATE_NONE , NAMEFLAG_HEURISTIC } ,
2014-07-30 14:20:45 -04:00
{ " Standard " , OBJ_FILTER_STANDARD , STATE_FILTER , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
2018-12-03 12:40:13 -05:00
{ " Sig " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " V " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " R " , OBJ_SIGNED , STATE_ANY , STATE_NONE , NAMEFLAG_HEURISTIC } ,
2014-07-30 14:20:45 -04:00
{ " Linearized " , OBJ_DICT , STATE_NONE , STATE_LINEARIZED , NAMEFLAG_HEURISTIC } ,
{ " Filter " , OBJ_HASFILTERS , STATE_ANY , STATE_FILTER , NAMEFLAG_HEURISTIC } ,
{ " JavaScript " , OBJ_JAVASCRIPT , STATE_S , STATE_JAVASCRIPT , NAMEFLAG_HEURISTIC } ,
{ " Length " , OBJ_DICT , STATE_FILTER , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " S " , OBJ_DICT , STATE_NONE , STATE_S , NAMEFLAG_HEURISTIC } ,
{ " Type " , OBJ_DICT , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC } ,
{ " OpenAction " , OBJ_OPENACTION , STATE_ANY , STATE_OPENACTION , NAMEFLAG_HEURISTIC } ,
{ " Launch " , OBJ_LAUNCHACTION , STATE_ANY , STATE_LAUNCHACTION , NAMEFLAG_HEURISTIC } ,
{ " Page " , OBJ_PAGE , STATE_NONE , STATE_NONE , NAMEFLAG_HEURISTIC } ,
2018-12-03 12:40:13 -05:00
{ " Contents " , OBJ_CONTENTS , STATE_NONE , STATE_CONTENTS , NAMEFLAG_HEURISTIC } } ;
2014-07-08 19:53:41 -04:00
# endif
2010-05-10 23:41:34 +03:00
2010-07-30 17:13:46 +03:00
# define KNOWN_FILTERS ((1 << OBJ_FILTER_AH) | (1 << OBJ_FILTER_RL) | (1 << OBJ_FILTER_A85) | (1 << OBJ_FILTER_FLATE) | (1 << OBJ_FILTER_LZW) | (1 << OBJ_FILTER_FAX) | (1 << OBJ_FILTER_DCT) | (1 << OBJ_FILTER_JPX) | (1 << OBJ_FILTER_CRYPT))
2010-07-30 14:23:10 +03:00
2014-04-07 16:39:54 -04:00
static void handle_pdfname ( struct pdf_struct * pdf , struct pdf_obj * obj , const char * pdfname , int escapes , enum objstate * state )
2010-05-10 23:41:34 +03:00
{
struct pdfname_action * act = NULL ;
unsigned j ;
2014-04-07 16:39:54 -04:00
2014-06-10 22:13:12 -04:00
obj - > statsflags | = OBJ_FLAG_PDFNAME_DONE ;
2018-12-03 12:40:13 -05:00
for ( j = 0 ; j < sizeof ( pdfname_actions ) / sizeof ( pdfname_actions [ 0 ] ) ; j + + ) {
2014-04-07 16:39:54 -04:00
if ( ! strcmp ( pdfname , pdfname_actions [ j ] . pdfname ) ) {
act = & pdfname_actions [ j ] ;
break ;
}
2010-05-10 23:41:34 +03:00
}
2014-04-07 16:39:54 -04:00
2010-07-30 14:23:10 +03:00
if ( ! act ) {
2014-04-07 16:39:54 -04:00
/* these are digital signature objects, filter doesn't matter,
* we don ' t need them anyway */
if ( * state = = STATE_FILTER & & ! ( obj - > flags & ( 1 < < OBJ_SIGNED ) ) & & ! ( obj - > flags & KNOWN_FILTERS ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " handle_pdfname: unknown filter %s \n " , pdfname ) ;
2014-04-07 16:39:54 -04:00
obj - > flags | = 1 < < OBJ_FILTER_UNKNOWN ;
}
return ;
2010-07-30 14:23:10 +03:00
}
2014-04-07 16:39:54 -04:00
2016-03-28 13:16:17 -04:00
/* record filter order */
2017-10-30 17:33:19 -04:00
if ( obj - > numfilters < PDF_FILTERLIST_MAX & & ( * state = = STATE_FILTER ) & & ( ( 1 < < act - > set_objflag ) & KNOWN_FILTERS ) )
2016-03-28 13:16:17 -04:00
obj - > filterlist [ obj - > numfilters + + ] = act - > set_objflag ;
2014-07-30 14:20:45 -04:00
if ( ( act - > nameflags & NAMEFLAG_HEURISTIC ) & & escapes ) {
/* if a commonly used PDF name is escaped that is certainly
suspicious . */
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " handle_pdfname: pdfname %s is escaped \n " , pdfname ) ;
2014-07-30 14:20:45 -04:00
pdfobj_flag ( pdf , obj , ESCAPED_COMMON_PDFNAME ) ;
}
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-04-16 14:23:16 -04:00
if ( ( act - > pdf_stats_cb ) )
act - > pdf_stats_cb ( pdf , obj , act ) ;
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-04-07 16:39:54 -04:00
if ( act - > from_state = = * state | | act - > from_state = = STATE_ANY ) {
* state = act - > to_state ;
2017-10-30 17:33:19 -04:00
if ( * state = = STATE_FILTER & & act - > set_objflag ! = OBJ_DICT & & ( obj - > flags & ( 1 < < act - > set_objflag ) ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " handle_pdfname: duplicate stream filter %s \n " , pdfname ) ;
2014-04-07 16:39:54 -04:00
pdfobj_flag ( pdf , obj , BAD_STREAM_FILTERS ) ;
}
obj - > flags | = 1 < < act - > set_objflag ;
} else {
/* auto-reset states */
switch ( * state ) {
2018-12-03 12:40:13 -05:00
case STATE_S :
* state = STATE_NONE ;
break ;
default :
break ;
2014-04-07 16:39:54 -04:00
}
2010-05-10 23:41:34 +03:00
}
}
2011-12-15 13:27:31 +02:00
static void pdf_parse_encrypt ( struct pdf_struct * pdf , const char * enc , int len )
{
const char * q , * q2 ;
2018-06-02 20:58:35 -04:00
unsigned long objid ;
unsigned long genid ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2011-12-15 13:27:31 +02:00
if ( len > = 16 & & ! strncmp ( enc , " /EncryptMetadata " , 16 ) ) {
2018-12-03 12:40:13 -05:00
q = cli_memstr ( enc + 16 , len - 16 , " /Encrypt " , 8 ) ;
2014-04-07 16:39:54 -04:00
if ( ! q )
return ;
len - = q - enc ;
enc = q ;
2011-12-15 13:27:31 +02:00
}
2014-04-07 16:39:54 -04:00
2011-12-15 13:27:31 +02:00
q = enc + 8 ;
len - = 8 ;
q2 = pdf_nextobject ( q , len ) ;
if ( ! q2 | | ! isdigit ( * q2 ) )
2014-04-07 16:39:54 -04:00
return ;
2011-12-15 13:27:31 +02:00
len - = q2 - q ;
q = q2 ;
2018-06-01 14:23:25 -04:00
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( q2 , ( size_t ) len , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parse_encrypt: Found Encrypt dictionary but failed to parse objid \n " ) ;
2018-06-02 20:58:35 -04:00
return ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_parse_encrypt: Encountered invalid negative objid (%ld). \n " , temp_long ) ;
return ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
objid = ( unsigned long ) temp_long ;
2018-06-02 20:58:35 -04:00
objid = objid < < 8 ;
2018-12-03 12:40:13 -05:00
q2 = pdf_nextobject ( q , len ) ;
2011-12-15 13:27:31 +02:00
if ( ! q2 | | ! isdigit ( * q2 ) )
2014-04-07 16:39:54 -04:00
return ;
2011-12-15 13:27:31 +02:00
len - = q2 - q ;
q = q2 ;
2018-06-01 14:23:25 -04:00
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( q2 , ( size_t ) len , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parse_encrypt: Found Encrypt dictionary but failed to parse genid \n " ) ;
2018-06-02 20:58:35 -04:00
return ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_parse_encrypt: Encountered invalid negative genid (%ld). \n " , temp_long ) ;
return ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
genid = ( unsigned long ) temp_long ;
2018-12-03 12:40:13 -05:00
objid | = genid & 0xff ;
2011-12-15 13:27:31 +02:00
q2 = pdf_nextobject ( q , len ) ;
if ( ! q2 | | * q2 ! = ' R ' )
2014-04-07 16:39:54 -04:00
return ;
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parse_encrypt: Encrypt dictionary in obj %lu %lu \n " , objid > > 8 , objid & 0xff ) ;
2014-04-07 16:39:54 -04:00
2011-12-15 13:27:31 +02:00
pdf - > enc_objid = objid ;
}
static void pdf_parse_trailer ( struct pdf_struct * pdf , const char * s , long length )
{
const char * enc ;
2014-04-07 16:39:54 -04:00
2011-12-15 13:27:31 +02:00
enc = cli_memstr ( s , length , " /Encrypt " , 8 ) ;
if ( enc ) {
2014-04-07 16:39:54 -04:00
char * newID ;
pdf - > flags | = 1 < < ENCRYPTED_PDF ;
pdf_parse_encrypt ( pdf , enc , s + length - enc ) ;
newID = pdf_readstring ( s , length , " /ID " , & pdf - > fileIDlen , NULL , 0 ) ;
if ( newID ) {
free ( pdf - > fileID ) ;
pdf - > fileID = newID ;
}
2011-12-15 13:27:31 +02:00
}
}
2014-06-25 13:36:30 -04:00
void pdf_parseobj ( struct pdf_struct * pdf , struct pdf_obj * obj )
2010-05-10 23:41:34 +03:00
{
/* enough to hold common pdf names, we don't need all the names */
char pdfname [ 64 ] ;
2013-01-24 14:43:58 -05:00
const char * q2 , * q3 ;
2017-12-21 14:39:01 -05:00
const char * nextobj = NULL , * nextopen = NULL , * nextclose = NULL ;
2018-12-03 12:40:13 -05:00
const char * q = NULL ;
2018-08-14 14:00:31 -07:00
const char * dict = NULL , * enddict = NULL , * start = NULL ;
2019-03-05 21:15:41 -05:00
off_t dict_length = 0 , full_dict_length = 0 , bytesleft = 0 ;
2018-12-03 12:40:13 -05:00
size_t i = 0 ;
2018-08-14 14:00:31 -07:00
unsigned filters = 0 , blockopens = 0 ;
2010-05-10 23:41:34 +03:00
enum objstate objstate = STATE_NONE ;
2014-06-30 15:43:53 -04:00
# if HAVE_JSON
2018-12-03 12:40:13 -05:00
json_object * pdfobj = NULL , * jsonobj = NULL ;
2014-06-30 15:43:53 -04:00
# endif
2010-05-10 23:41:34 +03:00
2019-01-22 14:15:46 -05:00
if ( NULL = = pdf | | NULL = = obj ) {
cli_warnmsg ( " pdf_parseobj: invalid arguments \n " ) ;
return ;
}
2019-03-05 21:15:41 -05:00
cli_dbgmsg ( " pdf_parseobj: Parsing object %u %u \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2019-01-22 13:53:29 -05:00
if ( obj - > objstm ) {
if ( ( size_t ) obj - > start > obj - > objstm - > streambuf_len ) {
cli_dbgmsg ( " pdf_parseobj: %u %u obj: obj start (%u) is greater than size of object stream (%zu). \n " ,
2019-01-22 18:04:53 -05:00
obj - > id > > 8 , obj - > id & 0xff , obj - > start , obj - > objstm - > streambuf_len ) ;
2019-01-22 13:53:29 -05:00
return ;
}
q = ( const char * ) ( obj - > start + obj - > objstm - > streambuf ) ;
} else {
if ( ( size_t ) obj - > start > pdf - > size ) {
cli_dbgmsg ( " pdf_parseobj: %u %u obj: obj start (%u) is greater than size of PDF (%lld). \n " ,
2019-01-22 18:04:53 -05:00
obj - > id > > 8 , obj - > id & 0xff , obj - > start , ( long long ) pdf - > size ) ;
2019-01-22 13:53:29 -05:00
return ;
}
q = ( const char * ) ( obj - > start + pdf - > map ) ;
}
start = q ;
2018-08-14 14:00:31 -07:00
2019-03-05 21:15:41 -05:00
if ( obj - > size < = 0 )
2014-04-07 16:39:54 -04:00
return ;
2019-01-22 13:53:29 -05:00
if ( obj - > objstm ) {
2019-03-05 21:15:41 -05:00
bytesleft = MIN ( obj - > size , obj - > objstm - > streambuf_len - obj - > start ) ;
2019-01-22 13:53:29 -05:00
} else {
2019-03-05 21:15:41 -05:00
bytesleft = MIN ( obj - > size , pdf - > size - obj - > start ) ;
}
/* For objects that aren't already in an object stream^, check if they contain a stream.
* ^ Objects in object streams aren ' t supposed to contain streams , so we don ' t check them . */
if ( NULL = = obj - > objstm ) {
/* Check if object contains stream */
cl_error_t has_stream ;
const char * stream = NULL ;
size_t stream_size = 0 ;
has_stream = find_stream_bounds (
start ,
obj - > size ,
& stream ,
& stream_size ,
( pdf - > enc_method_stream < = ENC_IDENTITY ) & & ( pdf - > enc_method_embeddedfile < = ENC_IDENTITY ) ) ;
if ( ( CL_SUCCESS = = has_stream ) | |
( CL_EFORMAT = = has_stream ) ) {
/* Stream found. Store this fact and the stream bounds. */
cli_dbgmsg ( " pdf_parseobj: %u %u contains stream, size: %zu \n " , obj - > id > > 8 , obj - > id & 0xff , stream_size ) ;
obj - > flags | = ( 1 < < OBJ_STREAM ) ;
obj - > stream = stream ;
obj - > stream_size = stream_size ;
}
2019-01-22 13:53:29 -05:00
}
2013-01-24 14:43:58 -05:00
2010-05-10 23:41:34 +03:00
/* find start of dictionary */
do {
2014-04-07 16:39:54 -04:00
nextobj = pdf_nextobject ( q , bytesleft ) ;
2018-12-03 12:40:13 -05:00
bytesleft - = nextobj - q ;
2014-04-07 16:39:54 -04:00
if ( ! nextobj | | bytesleft < 0 ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parseobj: %u %u obj: no dictionary \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-06-30 15:43:53 -04:00
# if HAVE_JSON
if ( ! ( pdfobj ) & & pdf - > ctx - > wrkproperty ! = NULL ) {
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
}
if ( pdfobj ) {
if ( ! ( jsonobj ) )
jsonobj = cli_jsonarray ( pdfobj , " ObjectsWithoutDictionaries " ) ;
if ( jsonobj )
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( jsonobj , obj - > id > > 8 ) ;
2014-06-30 15:43:53 -04:00
}
# endif
2014-04-07 16:39:54 -04:00
return ;
}
2019-01-22 14:15:46 -05:00
/*
* Opening ` < ` for object ' s dictionary may be back 1 character ,
* provided q is not at the start of the buffer ( it shouldn ' t be ) .
*/
if ( obj - > objstm ) {
if ( obj - > objstm - > streambuf = = q ) {
q3 = memchr ( q , ' < ' , nextobj - q ) ;
} else {
q3 = memchr ( q - 1 , ' < ' , nextobj - q + 1 ) ;
}
} else {
if ( pdf - > map = = q ) {
q3 = memchr ( q , ' < ' , nextobj - q ) ;
} else {
q3 = memchr ( q - 1 , ' < ' , nextobj - q + 1 ) ;
}
}
2014-04-07 16:39:54 -04:00
nextobj + + ;
bytesleft - - ;
q = nextobj ;
2010-05-10 23:41:34 +03:00
} while ( ! q3 | | q3 [ 1 ] ! = ' < ' ) ;
2018-12-03 12:40:13 -05:00
dict = q3 + 2 ;
q = dict ;
2013-01-24 14:43:58 -05:00
blockopens + + ;
2019-03-05 21:15:41 -05:00
bytesleft = obj - > size - ( q - start ) ;
2018-12-03 12:40:13 -05:00
enddict = q + bytesleft - 1 ;
2013-01-24 14:43:58 -05:00
/* find end of dictionary block */
2013-02-12 10:24:01 -05:00
if ( bytesleft < 0 ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parseobj: %u %u obj: broken dictionary \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-06-30 15:43:53 -04:00
# if HAVE_JSON
if ( ! ( pdfobj ) & & pdf - > ctx - > wrkproperty ! = NULL ) {
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
}
if ( pdfobj ) {
if ( ! ( jsonobj ) )
jsonobj = cli_jsonarray ( pdfobj , " ObjectsWithBrokenDictionaries " ) ;
if ( jsonobj )
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( jsonobj , obj - > id > > 8 ) ;
2014-06-30 15:43:53 -04:00
}
# endif
2013-02-12 10:24:01 -05:00
return ;
}
2013-01-24 14:43:58 -05:00
2013-02-12 10:24:01 -05:00
/* while still looking ... */
2018-12-03 12:40:13 -05:00
while ( ( q < enddict - 1 ) & & ( blockopens > 0 ) ) {
2013-02-12 10:24:01 -05:00
/* find next close */
2018-12-03 12:40:13 -05:00
nextclose = memchr ( q , ' > ' , enddict - q ) ;
2013-02-12 10:24:01 -05:00
if ( nextclose & & ( nextclose [ 1 ] = = ' > ' ) ) {
/* check for nested open */
2018-12-03 12:40:13 -05:00
while ( ( nextopen = memchr ( q - 1 , ' < ' , nextclose - q + 1 ) ) ! = NULL ) {
2013-02-12 10:24:01 -05:00
if ( nextopen [ 1 ] = = ' < ' ) {
/* nested open */
blockopens + + ;
q = nextopen + 2 ;
2018-12-03 12:40:13 -05:00
} else {
2013-02-12 10:24:01 -05:00
/* unmatched < before next close */
q = nextopen + 2 ;
2013-01-24 14:43:58 -05:00
}
}
2013-02-12 10:24:01 -05:00
/* close block */
blockopens - - ;
q = nextclose + 2 ;
2018-12-03 12:40:13 -05:00
} else if ( nextclose ) {
2013-02-12 10:24:01 -05:00
/* found one > but not two */
q = nextclose + 2 ;
2018-12-03 12:40:13 -05:00
} else {
2013-02-12 10:24:01 -05:00
/* next closing not found */
2013-03-15 11:29:25 -04:00
break ;
2013-02-12 10:24:01 -05:00
}
}
2013-01-24 14:43:58 -05:00
2013-02-12 10:24:01 -05:00
/* Was end of dictionary found? */
2013-03-15 11:29:25 -04:00
if ( blockopens ) {
/* probably truncated */
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parseobj: %u %u obj broken dictionary \n " , obj - > id > > 8 , obj - > id & 0xff ) ;
2014-06-30 15:43:53 -04:00
# if HAVE_JSON
if ( ! ( pdfobj ) & & pdf - > ctx - > wrkproperty ! = NULL ) {
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
}
if ( pdfobj ) {
if ( ! ( jsonobj ) )
jsonobj = cli_jsonarray ( pdfobj , " ObjectsWithBrokenDictionaries " ) ;
if ( jsonobj )
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( jsonobj , obj - > id > > 8 ) ;
2014-06-30 15:43:53 -04:00
}
# endif
2013-02-12 10:24:01 -05:00
return ;
2013-03-15 11:29:25 -04:00
}
2014-04-07 16:39:54 -04:00
2013-01-24 14:43:58 -05:00
enddict = nextclose ;
2010-05-10 23:41:34 +03:00
obj - > flags | = 1 < < OBJ_DICT ;
2013-01-24 14:43:58 -05:00
full_dict_length = dict_length = enddict - dict ;
/* This code prints the dictionary content.
{
char * dictionary = malloc ( dict_length + 1 ) ;
if ( dictionary ) {
2013-02-12 10:24:01 -05:00
for ( i = 0 ; i < dict_length ; i + + ) {
2016-04-01 15:20:36 -04:00
if ( dict [ i ] = = ' \r ' )
dictionary [ i ] = ' \n ' ;
else if ( isprint ( dict [ i ] ) | | isspace ( dict [ i ] ) )
2013-02-12 10:24:01 -05:00
dictionary [ i ] = dict [ i ] ;
else
dictionary [ i ] = ' * ' ;
}
2013-01-24 14:43:58 -05:00
dictionary [ dict_length ] = ' \0 ' ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parseobj: dictionary is <<%s>> \n " , dictionary ) ;
2013-01-24 14:43:58 -05:00
free ( dictionary ) ;
}
}
*/
2010-05-10 23:41:34 +03:00
2010-05-11 13:23:20 +03:00
/* process pdf names */
2018-12-03 12:40:13 -05:00
for ( q = dict ; dict_length > 0 ; ) {
int escapes = 0 , breakout = 0 ;
2014-04-07 16:39:54 -04:00
q2 = memchr ( q , ' / ' , dict_length ) ;
if ( ! q2 )
break ;
dict_length - = q2 - q ;
q = q2 ;
/* normalize PDF names */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; dict_length > 0 & & ( i < sizeof ( pdfname ) - 1 ) ; i + + ) {
2014-04-07 16:39:54 -04:00
q + + ;
dict_length - - ;
if ( * q = = ' # ' ) {
2018-12-03 12:40:13 -05:00
if ( cli_hex2str_to ( q + 1 , pdfname + i , 2 ) = = - 1 )
2014-04-07 16:39:54 -04:00
break ;
q + = 2 ;
dict_length - = 2 ;
escapes = 1 ;
continue ;
}
switch ( * q ) {
2018-12-03 12:40:13 -05:00
case ' ' :
case ' \t ' :
case ' \r ' :
case ' \n ' :
case ' / ' :
case ' > ' :
case ' [ ' :
case ' ] ' :
case ' < ' :
case ' ( ' :
breakout = 1 ;
2014-04-07 16:39:54 -04:00
}
if ( breakout )
break ;
pdfname [ i ] = * q ;
}
pdfname [ i ] = ' \0 ' ;
handle_pdfname ( pdf , obj , pdfname , escapes , & objstate ) ;
if ( objstate = = STATE_LINEARIZED ) {
long trailer_end , trailer ;
pdfobj_flag ( pdf , obj , LINEARIZED_PDF ) ;
2018-12-03 12:40:13 -05:00
objstate = STATE_NONE ;
2014-04-07 16:39:54 -04:00
trailer_end = pdf_readint ( dict , full_dict_length , " /H " ) ;
2019-03-05 21:15:41 -05:00
if ( ( trailer_end > 0 ) & & ( ( size_t ) trailer_end < pdf - > size ) ) {
2014-04-07 16:39:54 -04:00
trailer = trailer_end - 1024 ;
if ( trailer < 0 )
trailer = 0 ;
q2 = pdf - > map + trailer ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parseobj: looking for trailer in linearized pdf: %ld - %ld \n " , trailer , trailer_end ) ;
2014-04-07 16:39:54 -04:00
pdf_parse_trailer ( pdf , q2 , trailer_end - trailer ) ;
if ( pdf - > fileID )
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parseobj: found fileID \n " ) ;
2014-04-07 16:39:54 -04:00
}
}
if ( objstate = = STATE_LAUNCHACTION )
pdfobj_flag ( pdf , obj , HAS_LAUNCHACTION ) ;
if ( dict_length > 0 & & ( objstate = = STATE_JAVASCRIPT | | objstate = = STATE_OPENACTION | | objstate = = STATE_CONTENTS ) ) {
2018-03-08 12:21:16 -05:00
off_t dict_remaining = dict_length ;
2014-04-07 16:39:54 -04:00
if ( objstate = = STATE_OPENACTION )
pdfobj_flag ( pdf , obj , HAS_OPENACTION ) ;
2018-03-08 12:21:16 -05:00
q2 = pdf_nextobject ( q , dict_remaining ) ;
2014-04-07 16:39:54 -04:00
if ( q2 & & isdigit ( * q2 ) ) {
2018-12-03 12:40:13 -05:00
const char * q2_old = NULL ;
2018-06-02 20:58:35 -04:00
unsigned long objid ;
unsigned long genid ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2018-06-02 20:58:35 -04:00
2018-03-08 12:21:16 -05:00
dict_remaining - = ( off_t ) ( q2 - q ) ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( q2 , ( size_t ) dict_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parseobj: failed to parse object objid \n " ) ;
2018-06-02 20:58:35 -04:00
return ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_parseobj: Encountered invalid negative genid (%ld). \n " , temp_long ) ;
return ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
objid = ( unsigned long ) temp_long ;
2018-06-02 20:58:35 -04:00
objid = objid < < 8 ;
2019-01-22 18:04:53 -05:00
while ( ( dict_remaining > 0 ) & & isdigit ( * q2 ) ) {
2014-04-07 16:39:54 -04:00
q2 + + ;
2019-01-22 18:04:53 -05:00
dict_remaining - - ;
}
2014-04-07 16:39:54 -04:00
2018-03-08 12:21:16 -05:00
q2_old = q2 ;
2018-12-03 12:40:13 -05:00
q2 = pdf_nextobject ( q2 , dict_remaining ) ;
2014-04-07 16:39:54 -04:00
if ( q2 & & isdigit ( * q2 ) ) {
2018-03-08 12:21:16 -05:00
dict_remaining - = ( off_t ) ( q2 - q2_old ) ;
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( q2 , ( size_t ) dict_remaining , 0 , 10 , & temp_long ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_parseobj: failed to parse object genid \n " ) ;
2018-06-02 20:58:35 -04:00
return ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " pdf_parseobj: Encountered invalid negative genid (%ld). \n " , temp_long ) ;
return ;
2018-06-02 20:58:35 -04:00
}
2019-01-22 14:15:46 -05:00
genid = ( unsigned long ) temp_long ;
2018-06-02 20:58:35 -04:00
objid | = genid & 0xff ;
2014-04-07 16:39:54 -04:00
2018-03-08 12:21:16 -05:00
q2 = pdf_nextobject ( q2 , dict_remaining ) ;
2014-04-07 16:39:54 -04:00
if ( q2 & & * q2 = = ' R ' ) {
struct pdf_obj * obj2 ;
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parseobj: found %s stored in indirect object %lu %lu \n " , pdfname , objid > > 8 , objid & 0xff ) ;
2014-04-07 16:39:54 -04:00
obj2 = find_obj ( pdf , obj , objid ) ;
if ( obj2 ) {
enum pdf_objflags flag =
2018-12-03 12:40:13 -05:00
objstate = = STATE_JAVASCRIPT ? OBJ_JAVASCRIPT : objstate = = STATE_OPENACTION ? OBJ_OPENACTION :
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
OBJ_CONTENTS ;
2014-04-07 16:39:54 -04:00
obj2 - > flags | = 1 < < flag ;
obj - > flags & = ~ ( 1 < < flag ) ;
} else {
pdfobj_flag ( pdf , obj , BAD_INDOBJ ) ;
}
}
}
}
objstate = STATE_NONE ;
}
2010-05-10 23:41:34 +03:00
}
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < sizeof ( pdfname_actions ) / sizeof ( pdfname_actions [ 0 ] ) ; i + + ) {
2014-04-07 16:39:54 -04:00
const struct pdfname_action * act = & pdfname_actions [ i ] ;
if ( ( obj - > flags & ( 1 < < act - > set_objflag ) ) & &
act - > from_state = = STATE_FILTER & &
act - > to_state = = STATE_FILTER & &
act - > set_objflag ! = OBJ_FILTER_CRYPT & &
act - > set_objflag ! = OBJ_FILTER_STANDARD ) {
filters + + ;
}
2010-08-01 22:14:44 +03:00
}
2014-04-07 16:39:54 -04:00
if ( filters > 2 ) {
/* more than 2 non-crypt filters */
pdfobj_flag ( pdf , obj , MANY_FILTERS ) ;
2010-08-01 22:14:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-07-30 20:26:59 +03:00
if ( obj - > flags & ( ( 1 < < OBJ_SIGNED ) | KNOWN_FILTERS ) )
2014-04-07 16:39:54 -04:00
obj - > flags & = ~ ( 1 < < OBJ_FILTER_UNKNOWN ) ;
2010-07-30 20:26:59 +03:00
if ( obj - > flags & ( 1 < < OBJ_FILTER_UNKNOWN ) )
2014-04-07 16:39:54 -04:00
pdfobj_flag ( pdf , obj , UNKNOWN_FILTER ) ;
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_parseobj: %u %u obj flags: %02x \n " , obj - > id > > 8 , obj - > id & 0xff , obj - > flags ) ;
2010-05-10 23:41:34 +03:00
}
2018-06-08 10:13:13 -07:00
/**
* @ brief Given a pointer to a dictionary object and a key , get the key ' s value .
*
* @ param q0 Offset of the start of the dictionary .
* @ param [ in , out ] len In : The number of bytes in the dictionary .
* Out : The number of bytes remaining from the start
* of the value to the end of the dict
* @ param key Null terminated ' key ' to search for .
* @ return const char * Address of the dictionary key ' s ' value ' .
*/
2018-12-03 12:40:13 -05:00
static const char * pdf_getdict ( const char * q0 , int * len , const char * key )
2011-05-07 18:06:06 +03:00
{
const char * q ;
2011-10-12 10:18:44 +03:00
if ( * len < = 0 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_getdict: bad length %d \n " , * len ) ;
2013-02-12 10:24:01 -05:00
return NULL ;
}
2014-04-07 16:39:54 -04:00
if ( ! q0 )
return NULL ;
2018-06-08 10:13:13 -07:00
/* find the key */
2011-05-07 18:06:06 +03:00
q = cli_memstr ( q0 , * len , key , strlen ( key ) ) ;
if ( ! q ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_getdict: %s not found in dict \n " , key ) ;
2014-04-07 16:39:54 -04:00
return NULL ;
2011-05-07 18:06:06 +03:00
}
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
* len - = q - q0 ;
q0 = q ;
2018-06-08 10:13:13 -07:00
/* find the start of the value object */
2011-05-07 18:06:06 +03:00
q = pdf_nextobject ( q0 + 1 , * len - 1 ) ;
if ( ! q ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_getdict: %s is invalid in dict \n " , key ) ;
2014-04-07 16:39:54 -04:00
return NULL ;
2011-05-07 18:06:06 +03:00
}
2014-04-07 16:39:54 -04:00
2018-06-08 10:13:13 -07:00
/* if the value is a dictionary object, include the < > brackets.*/
2011-05-07 18:06:06 +03:00
if ( q [ - 1 ] = = ' < ' )
2014-04-07 16:39:54 -04:00
q - - ;
2011-05-07 18:06:06 +03:00
* len - = q - q0 ;
return q ;
}
2012-01-18 20:58:38 +02:00
static char * pdf_readstring ( const char * q0 , int len , const char * key , unsigned * slen , const char * * qend , int noescape )
2011-05-07 18:06:06 +03:00
{
char * s , * s0 ;
const char * start , * q , * end ;
if ( slen )
2014-04-07 16:39:54 -04:00
* slen = 0 ;
2012-01-18 20:58:38 +02:00
if ( qend )
* qend = q0 ;
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
q = pdf_getdict ( q0 , & len , key ) ;
2018-06-08 10:13:13 -07:00
if ( ! q | | len < = 0 )
2014-04-07 16:39:54 -04:00
return NULL ;
2011-05-07 18:06:06 +03:00
if ( * q = = ' ( ' ) {
2014-04-07 16:39:54 -04:00
int paren = 1 ;
2018-12-03 12:40:13 -05:00
start = + + q ;
2018-06-08 10:13:13 -07:00
len - - ;
2018-12-03 12:40:13 -05:00
for ( ; paren > 0 & & len > 0 ; q + + , len - - ) {
2014-04-07 16:39:54 -04:00
switch ( * q ) {
2018-12-03 12:40:13 -05:00
case ' ( ' :
paren + + ;
break ;
case ' ) ' :
paren - - ;
break ;
case ' \\ ' :
q + + ;
len - - ;
break ;
default :
break ;
2014-04-07 16:39:54 -04:00
}
}
2018-06-08 10:13:13 -07:00
if ( len < = 0 ) {
cli_errmsg ( " pdf_readstring: Invalid, truncated dictionary. \n " ) ;
return NULL ;
}
2012-01-18 20:58:38 +02:00
if ( qend )
* qend = q ;
2014-04-07 16:39:54 -04:00
q - - ;
2018-12-03 12:40:13 -05:00
len = q - start ;
2014-04-07 16:39:54 -04:00
s0 = s = cli_malloc ( len + 1 ) ;
if ( ! s ) {
cli_errmsg ( " pdf_readstring: Unable to allocate buffer \n " ) ;
return NULL ;
}
end = start + len ;
2012-01-18 20:58:38 +02:00
if ( noescape ) {
memcpy ( s0 , start , len ) ;
s = s0 + len ;
} else {
2018-12-03 12:40:13 -05:00
for ( q = start ; q < end ; q + + ) {
2014-04-07 16:39:54 -04:00
if ( * q ! = ' \\ ' ) {
* s + + = * q ;
} else {
q + + ;
switch ( * q ) {
2018-12-03 12:40:13 -05:00
case ' n ' :
* s + + = ' \n ' ;
break ;
case ' r ' :
* s + + = ' \r ' ;
break ;
case ' t ' :
* s + + = ' \t ' ;
break ;
case ' b ' :
* s + + = ' \b ' ;
break ;
case ' f ' :
* s + + = ' \f ' ;
break ;
case ' ( ' : /* fall-through */
case ' ) ' : /* fall-through */
case ' \\ ' :
* s + + = * q ;
break ;
case ' \n ' :
/* ignore */
break ;
case ' \r ' :
/* ignore */
if ( q + 1 < end & & q [ 1 ] = = ' \n ' )
q + + ;
break ;
case ' 0 ' :
case ' 1 ' :
case ' 2 ' :
case ' 3 ' :
case ' 4 ' :
case ' 5 ' :
case ' 6 ' :
case ' 7 ' :
case ' 8 ' :
case ' 9 ' :
/* octal escape */
if ( q + 2 < end )
q + + ;
* s + + = 64 * ( q [ 0 ] - ' 0 ' ) + 8 * ( q [ 1 ] - ' 0 ' ) + ( q [ 2 ] - ' 0 ' ) ;
break ;
default :
/* ignore */
* s + + = ' \\ ' ;
q - - ;
break ;
2014-04-07 16:39:54 -04:00
}
}
}
2012-01-18 20:58:38 +02:00
}
2014-04-07 16:39:54 -04:00
* s + + = ' \0 ' ;
if ( slen )
* slen = s - s0 - 1 ;
return s0 ;
2011-05-07 18:06:06 +03:00
}
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
if ( ( * q = = ' < ' ) & & ( len > = 3 ) ) {
2014-04-07 16:39:54 -04:00
start = + + q ;
2018-07-20 22:28:48 -04:00
len - = 1 ;
2018-12-03 12:40:13 -05:00
q = memchr ( q + 1 , ' > ' , len - 1 ) ;
2014-04-07 16:39:54 -04:00
if ( ! q )
return NULL ;
2012-01-18 20:58:38 +02:00
if ( qend )
* qend = q ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
s = cli_malloc ( ( q - start ) / 2 + 1 ) ;
2014-04-07 16:39:54 -04:00
if ( s = = NULL ) { /* oops, couldn't allocate memory */
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_readstring: unable to allocate memory... \n " ) ;
return NULL ;
2014-04-07 16:39:54 -04:00
}
if ( cli_hex2str_to ( start , s , q - start ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_readstring: %s has bad hex value \n " , key ) ;
2014-04-07 16:39:54 -04:00
free ( s ) ;
return NULL ;
}
2018-12-03 12:40:13 -05:00
s [ ( q - start ) / 2 ] = ' \0 ' ;
2014-04-07 16:39:54 -04:00
if ( slen )
2018-12-03 12:40:13 -05:00
* slen = ( q - start ) / 2 ;
2014-04-07 16:39:54 -04:00
return s ;
2011-05-07 18:06:06 +03:00
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_readstring: %s is invalid string in dict \n " , key ) ;
2011-05-07 18:06:06 +03:00
return NULL ;
}
2011-12-14 15:43:14 +02:00
static char * pdf_readval ( const char * q , int len , const char * key )
{
const char * end ;
char * s ;
2016-04-14 11:16:43 -04:00
int origlen = len ;
2011-12-14 15:43:14 +02:00
q = pdf_getdict ( q , & len , key ) ;
if ( ! q | | len < = 0 )
2014-04-07 16:39:54 -04:00
return NULL ;
while ( len > 0 & & * q & & * q = = ' ' ) {
q + + ;
len - - ;
}
2011-12-14 15:43:14 +02:00
if ( * q ! = ' / ' )
2014-04-07 16:39:54 -04:00
return NULL ;
2011-12-14 15:43:14 +02:00
q + + ;
len - - ;
end = q ;
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
while ( len > 0 & & * end & & ! ( * end = = ' / ' | | ( len > 1 & & end [ 0 ] = = ' > ' & & end [ 1 ] = = ' > ' ) ) ) {
2014-04-07 16:39:54 -04:00
end + + ;
len - - ;
2011-12-14 15:43:14 +02:00
}
2014-04-07 16:39:54 -04:00
2016-04-14 11:16:43 -04:00
/* end-of-buffer whitespace trimming */
2018-12-03 12:40:13 -05:00
while ( len < origlen & & isspace ( * ( end - 1 ) ) ) {
2016-04-14 11:16:43 -04:00
end - - ;
len + + ;
}
2011-12-14 15:43:14 +02:00
s = cli_malloc ( end - q + 1 ) ;
if ( ! s )
2014-04-07 16:39:54 -04:00
return NULL ;
2018-12-03 12:40:13 -05:00
memcpy ( s , q , end - q ) ;
s [ end - q ] = ' \0 ' ;
2014-04-07 16:39:54 -04:00
2011-12-14 15:43:14 +02:00
return s ;
}
2011-05-07 18:06:06 +03:00
static int pdf_readint ( const char * q0 , int len , const char * key )
{
2018-12-03 12:40:13 -05:00
long value = 0 ;
const char * q = pdf_getdict ( q0 , & len , key ) ;
2014-04-07 16:39:54 -04:00
2018-06-02 20:58:35 -04:00
if ( q = = NULL ) {
value = - 1 ;
2018-12-03 12:40:13 -05:00
} else if ( CL_SUCCESS ! = cli_strntol_wrap ( q , ( size_t ) len , 0 , 10 , & value ) ) {
2018-06-02 20:58:35 -04:00
value = - 1 ;
}
return value ;
2011-05-07 18:06:06 +03:00
}
static int pdf_readbool ( const char * q0 , int len , const char * key , int Default )
{
2018-12-03 12:40:13 -05:00
const char * q = pdf_getdict ( q0 , & len , key ) ;
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
if ( ! q | | len < 5 )
2014-04-07 16:39:54 -04:00
return Default ;
2011-05-07 18:06:06 +03:00
if ( ! strncmp ( q , " true " , 4 ) )
2014-04-07 16:39:54 -04:00
return 1 ;
2011-05-07 18:06:06 +03:00
if ( ! strncmp ( q , " false " , 5 ) )
2014-04-07 16:39:54 -04:00
return 0 ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_readbool: invalid value for %s bool \n " , key ) ;
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
return Default ;
}
static const char * key_padding =
2018-12-03 12:40:13 -05:00
" \x28 \xBF \x4E \x5E \x4E \x75 \x8A \x41 \x64 \x00 \x4e \x56 \xff \xfa \x01 \x08 "
" \x2e \x2e \x00 \xB6 \xD0 \x68 \x3E \x80 \x2F \x0C \xA9 \xFE \x64 \x53 \x69 \x7A " ;
2011-05-07 18:06:06 +03:00
static void dbg_printhex ( const char * msg , const char * hex , unsigned len )
{
if ( cli_debug_flag ) {
2014-04-07 16:39:54 -04:00
char * kh = cli_str2hex ( hex , len ) ;
cli_dbgmsg ( " cli_pdf: %s: %s \n " , msg , kh ) ;
free ( kh ) ;
2011-05-07 18:06:06 +03:00
}
}
static void check_user_password ( struct pdf_struct * pdf , int R , const char * O ,
2018-12-03 12:40:13 -05:00
const char * U , int32_t P , int EM ,
const char * UE ,
unsigned length , unsigned oulen )
2011-05-07 18:06:06 +03:00
{
unsigned i ;
uint8_t result [ 16 ] ;
char data [ 32 ] ;
struct arc4_state arc4 ;
unsigned password_empty = 0 ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( oulen ) ;
2011-05-07 18:06:06 +03:00
dbg_printhex ( " U: " , U , 32 ) ;
dbg_printhex ( " O: " , O , 32 ) ;
if ( R = = 5 ) {
2014-04-07 16:39:54 -04:00
uint8_t result2 [ 32 ] ;
/* supplement to ISO3200, 3.5.2 Algorithm 3.11 */
/* user validation salt */
2018-12-03 12:40:13 -05:00
cl_sha256 ( U + 32 , 8 , result2 , NULL ) ;
2014-07-10 18:11:49 -04:00
dbg_printhex ( " Computed U " , ( const char * ) result2 , 32 ) ;
2014-04-07 16:39:54 -04:00
if ( ! memcmp ( result2 , U , 32 ) ) {
2017-08-16 17:31:45 -04:00
size_t UE_len ;
2014-04-07 16:39:54 -04:00
/* Algorithm 3.2a could be used to recover encryption key */
password_empty = 1 ;
2018-12-03 12:40:13 -05:00
cl_sha256 ( U + 40 , 8 , result2 , NULL ) ;
2017-08-16 17:31:45 -04:00
UE_len = UE ? strlen ( UE ) : 0 ;
if ( UE_len ! = 32 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " check_user_password: UE length is not 32: %zu \n " , UE_len ) ;
noisy_warnmsg ( " check_user_password: UE length is not 32: %zu \n " , UE_len ) ;
2014-04-07 16:39:54 -04:00
} else {
pdf - > keylen = 32 ;
2018-12-03 12:40:13 -05:00
pdf - > key = cli_malloc ( 32 ) ;
2014-04-07 16:39:54 -04:00
if ( ! pdf - > key ) {
cli_errmsg ( " check_user_password: Cannot allocate memory for pdf->key \n " ) ;
return ;
}
2017-08-16 17:31:45 -04:00
aes_decrypt ( ( const unsigned char * ) UE , & UE_len , ( unsigned char * ) ( pdf - > key ) , ( char * ) result2 , 32 , 0 ) ;
2018-08-14 14:00:31 -07:00
dbg_printhex ( " check_user_password: Candidate encryption key " , pdf - > key , pdf - > keylen ) ;
2014-04-07 16:39:54 -04:00
}
2013-03-01 13:51:15 -05:00
}
2013-03-12 10:45:44 -04:00
} else if ( ( R > = 2 ) & & ( R < = 4 ) ) {
2014-02-08 00:31:12 -05:00
unsigned char * d ;
size_t sz = 68 + pdf - > fileIDlen + ( R > = 4 & & ! EM ? 4 : 0 ) ;
2018-12-03 12:40:13 -05:00
d = calloc ( 1 , sz ) ;
2014-02-08 00:31:12 -05:00
if ( ! ( d ) )
return ;
memcpy ( d , key_padding , 32 ) ;
2018-12-03 12:40:13 -05:00
memcpy ( d + 32 , O , 32 ) ;
2014-02-08 00:31:12 -05:00
P = le32_to_host ( P ) ;
2018-12-03 12:40:13 -05:00
memcpy ( d + 64 , & P , 4 ) ;
memcpy ( d + 68 , pdf - > fileID , pdf - > fileIDlen ) ;
2014-02-08 00:31:12 -05:00
2014-04-07 16:39:54 -04:00
/* 7.6.3.3 Algorithm 2 */
/* empty password, password == padding */
if ( R > = 4 & & ! EM ) {
uint32_t v = 0xFFFFFFFF ;
2018-12-03 12:40:13 -05:00
memcpy ( d + 68 + pdf - > fileIDlen , & v , 4 ) ;
2014-04-07 16:39:54 -04:00
}
cl_hash_data ( " md5 " , d , sz , result , NULL ) ;
free ( d ) ;
if ( length > 128 )
length = 128 ;
if ( R > = 3 ) {
/* Yes, this really is on purpose */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 50 ; i + + )
cl_hash_data ( " md5 " , result , length / 8 , result , NULL ) ;
2014-04-07 16:39:54 -04:00
}
if ( R = = 2 )
length = 40 ;
pdf - > keylen = length / 8 ;
2018-12-03 12:40:13 -05:00
pdf - > key = cli_malloc ( pdf - > keylen ) ;
2014-04-07 16:39:54 -04:00
if ( ! pdf - > key )
2014-02-08 00:31:12 -05:00
return ;
2014-04-07 16:39:54 -04:00
memcpy ( pdf - > key , result , pdf - > keylen ) ;
2014-07-10 18:11:49 -04:00
dbg_printhex ( " md5 " , ( const char * ) result , 16 ) ;
2014-04-07 16:39:54 -04:00
dbg_printhex ( " Candidate encryption key " , pdf - > key , pdf - > keylen ) ;
/* 7.6.3.3 Algorithm 6 */
if ( R = = 2 ) {
/* 7.6.3.3 Algorithm 4 */
memcpy ( data , key_padding , 32 ) ;
2014-07-10 18:11:49 -04:00
arc4_init ( & arc4 , ( const uint8_t * ) ( pdf - > key ) , pdf - > keylen ) ;
arc4_apply ( & arc4 , ( uint8_t * ) data , 32 ) ;
2014-04-07 16:39:54 -04:00
dbg_printhex ( " computed U (R2) " , data , 32 ) ;
if ( ! memcmp ( data , U , 32 ) )
password_empty = 1 ;
} else if ( R > = 3 ) {
unsigned len = pdf - > keylen ;
unsigned char * d ;
d = calloc ( 1 , 32 + pdf - > fileIDlen ) ;
if ( ! ( d ) )
return ;
/* 7.6.3.3 Algorithm 5 */
memcpy ( d , key_padding , 32 ) ;
2018-12-03 12:40:13 -05:00
memcpy ( d + 32 , pdf - > fileID , pdf - > fileIDlen ) ;
2014-04-07 16:39:54 -04:00
cl_hash_data ( " md5 " , d , 32 + pdf - > fileIDlen , result , NULL ) ;
memcpy ( data , pdf - > key , len ) ;
2014-07-10 18:11:49 -04:00
arc4_init ( & arc4 , ( const uint8_t * ) data , len ) ;
2014-04-07 16:39:54 -04:00
arc4_apply ( & arc4 , result , 16 ) ;
2018-12-03 12:40:13 -05:00
for ( i = 1 ; i < = 19 ; i + + ) {
2014-04-07 16:39:54 -04:00
unsigned j ;
2018-12-03 12:40:13 -05:00
for ( j = 0 ; j < len ; j + + )
2014-04-07 16:39:54 -04:00
data [ j ] = pdf - > key [ j ] ^ i ;
2014-07-10 18:11:49 -04:00
arc4_init ( & arc4 , ( const uint8_t * ) data , len ) ;
2014-04-07 16:39:54 -04:00
arc4_apply ( & arc4 , result , 16 ) ;
}
dbg_printhex ( " fileID " , pdf - > fileID , pdf - > fileIDlen ) ;
2014-07-10 18:11:49 -04:00
dbg_printhex ( " computed U (R>=3) " , ( const char * ) result , 16 ) ;
2014-04-07 16:39:54 -04:00
if ( ! memcmp ( result , U , 16 ) )
password_empty = 1 ;
2014-09-04 15:26:55 -04:00
free ( d ) ;
2014-04-07 16:39:54 -04:00
} else {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " check_user_password: invalid revision %d \n " , R ) ;
noisy_warnmsg ( " check_user_password: invalid revision %d \n " , R ) ;
2014-04-07 16:39:54 -04:00
}
} else {
/* Supported R is in {2,3,4,5} */
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " check_user_password: R value out of range \n " ) ;
noisy_warnmsg ( " check_user_password: R value out of range \n " ) ;
2014-04-07 16:39:54 -04:00
return ;
2013-03-12 10:45:44 -04:00
}
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
if ( password_empty ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " check_user_password: user password is empty \n " ) ;
noisy_msg ( pdf , " check_user_password: encrypted PDF found, user password is empty, will attempt to decrypt \n " ) ;
2014-04-07 16:39:54 -04:00
/* The key we computed above is the key used to encrypt the streams.
* We could decrypt it now if we wanted to */
pdf - > flags | = 1 < < DECRYPTABLE_PDF ;
2011-05-07 18:06:06 +03:00
} else {
2014-04-07 16:39:54 -04:00
/* the key is not valid, we would need the user or the owner password to decrypt */
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " check_user_password: user/owner password would be required for decryption \n " ) ;
noisy_warnmsg ( " check_user_password: encrypted PDF found, user password is NOT empty, cannot decrypt! \n " ) ;
2011-05-07 18:06:06 +03:00
}
}
2016-03-31 12:29:16 -04:00
enum enc_method parse_enc_method ( const char * dict , unsigned len , const char * key , enum enc_method def )
2011-12-15 17:24:36 +02:00
{
const char * q ;
2018-12-03 12:40:13 -05:00
char * CFM = NULL ;
2013-08-07 13:50:08 -04:00
enum enc_method ret = ENC_UNKNOWN ;
2014-04-07 16:39:54 -04:00
2011-12-15 17:24:36 +02:00
if ( ! key )
2014-04-07 16:39:54 -04:00
return def ;
2011-12-15 17:24:36 +02:00
if ( ! strcmp ( key , " Identity " ) )
2014-04-07 16:39:54 -04:00
return ENC_IDENTITY ;
2014-07-10 18:11:49 -04:00
q = pdf_getdict ( dict , ( int * ) ( & len ) , key ) ;
2011-12-15 17:24:36 +02:00
if ( ! q )
2014-04-07 16:39:54 -04:00
return def ;
2011-12-15 17:24:36 +02:00
CFM = pdf_readval ( q , len , " /CFM " ) ;
if ( CFM ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " parse_enc_method: %s CFM: %s \n " , key , CFM ) ;
2018-12-03 12:40:13 -05:00
if ( ! strncmp ( CFM , " V2 " , 2 ) )
2014-04-07 16:39:54 -04:00
ret = ENC_V2 ;
2018-12-03 12:40:13 -05:00
else if ( ! strncmp ( CFM , " AESV2 " , 5 ) )
2014-04-07 16:39:54 -04:00
ret = ENC_AESV2 ;
2018-12-03 12:40:13 -05:00
else if ( ! strncmp ( CFM , " AESV3 " , 5 ) )
2014-04-07 16:39:54 -04:00
ret = ENC_AESV3 ;
2018-12-03 12:40:13 -05:00
else if ( ! strncmp ( CFM , " None " , 4 ) )
2014-04-07 16:39:54 -04:00
ret = ENC_NONE ;
free ( CFM ) ;
2011-12-15 17:24:36 +02:00
}
2014-04-07 16:39:54 -04:00
2013-08-07 13:50:08 -04:00
return ret ;
2011-12-15 17:24:36 +02:00
}
2015-03-20 15:10:52 -04:00
void pdf_handle_enc ( struct pdf_struct * pdf )
2011-05-07 18:06:06 +03:00
{
struct pdf_obj * obj ;
2014-07-10 18:11:49 -04:00
uint32_t len , n , R , P , length , EM = 1 , i , oulen ;
2011-12-15 17:24:36 +02:00
char * O , * U , * UE , * StmF , * StrF , * EFF ;
2011-05-07 18:06:06 +03:00
const char * q , * q2 ;
2011-10-08 13:36:12 +03:00
if ( pdf - > enc_objid = = ~ 0u )
2014-04-07 16:39:54 -04:00
return ;
2011-10-08 13:36:12 +03:00
if ( ! pdf - > fileID ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: no file ID \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: no file ID \n " ) ;
2014-04-07 16:39:54 -04:00
return ;
2011-10-08 13:36:12 +03:00
}
2014-04-07 16:39:54 -04:00
2018-08-14 14:00:31 -07:00
obj = find_obj ( pdf , pdf - > objs [ 0 ] , pdf - > enc_objid ) ;
2011-10-08 13:36:12 +03:00
if ( ! obj ) {
2018-12-03 12:40:13 -05:00
cli_dbgmsg ( " pdf_handle_enc: can't find encrypted object %d %d \n " , pdf - > enc_objid > > 8 , pdf - > enc_objid & 0xff ) ;
noisy_warnmsg ( " pdf_handle_enc: can't find encrypted object %d %d \n " , pdf - > enc_objid > > 8 , pdf - > enc_objid & 0xff ) ;
2014-04-07 16:39:54 -04:00
return ;
2011-10-08 13:36:12 +03:00
}
2014-04-07 16:39:54 -04:00
2019-03-05 21:15:41 -05:00
len = obj - > size ;
2018-12-03 12:40:13 -05:00
q = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
2018-10-25 13:06:15 -07:00
: ( const char * ) ( obj - > start + pdf - > map ) ;
2011-05-07 18:06:06 +03:00
2011-12-15 17:24:36 +02:00
O = U = UE = StmF = StrF = EFF = NULL ;
2011-05-07 18:06:06 +03:00
do {
2011-12-14 15:43:14 +02:00
2018-12-03 12:40:13 -05:00
pdf - > enc_method_string = ENC_UNKNOWN ;
pdf - > enc_method_stream = ENC_UNKNOWN ;
2014-04-07 16:39:54 -04:00
pdf - > enc_method_embeddedfile = ENC_UNKNOWN ;
2018-12-03 12:40:13 -05:00
P = pdf_readint ( q , len , " /P " ) ;
2014-04-07 16:39:54 -04:00
if ( P = = ~ 0u ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: invalid P \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: invalid P \n " ) ;
2014-04-07 16:39:54 -04:00
break ;
}
2011-05-07 18:06:06 +03:00
2014-04-07 16:39:54 -04:00
q2 = cli_memstr ( q , len , " /Standard " , 9 ) ;
if ( ! q2 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: /Standard not found \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: /Standard not found \n " ) ;
2014-04-07 16:39:54 -04:00
break ;
}
2011-05-07 18:06:06 +03:00
2014-04-07 16:39:54 -04:00
/* we can have both of these:
* / AESV2 / Length / Standard / Length
* / Length / Standard
* make sure we don ' t mistake AES ' s length for Standard ' s */
length = pdf_readint ( q2 , len - ( q2 - q ) , " /Length " ) ;
if ( length = = ~ 0u )
length = pdf_readint ( q , len , " /Length " ) ;
if ( length < 40 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: invalid length: %d \n " , length ) ;
2014-04-07 16:39:54 -04:00
length = 40 ;
}
2011-05-07 18:06:06 +03:00
2014-04-07 16:39:54 -04:00
R = pdf_readint ( q , len , " /R " ) ;
if ( R = = ~ 0u ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: invalid R \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: invalid R \n " ) ;
2014-04-07 16:39:54 -04:00
break ;
}
2011-05-07 18:06:06 +03:00
2014-04-07 16:39:54 -04:00
if ( ( R > 5 ) | | ( R < 2 ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: R value outside supported range [2..5] \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: R value outside supported range [2..5] \n " ) ;
2014-04-07 16:39:54 -04:00
break ;
}
if ( R < 5 )
oulen = 32 ;
else
oulen = 48 ;
if ( R = = 2 | | R = = 3 ) {
2018-12-03 12:40:13 -05:00
pdf - > enc_method_stream = ENC_V2 ;
pdf - > enc_method_string = ENC_V2 ;
2014-04-07 16:39:54 -04:00
pdf - > enc_method_embeddedfile = ENC_V2 ;
} else if ( R = = 4 | | R = = 5 ) {
2018-12-03 12:40:13 -05:00
EM = pdf_readbool ( q , len , " /EncryptMetadata " , 1 ) ;
StmF = pdf_readval ( q , len , " /StmF " ) ;
StrF = pdf_readval ( q , len , " /StrF " ) ;
EFF = pdf_readval ( q , len , " /EFF " ) ;
n = len ;
pdf - > CF = pdf_getdict ( q , ( int * ) ( & n ) , " /CF " ) ;
2014-04-07 16:39:54 -04:00
pdf - > CF_n = n ;
if ( StmF )
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: StmF: %s \n " , StmF ) ;
2014-04-07 16:39:54 -04:00
if ( StrF )
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: StrF: %s \n " , StrF ) ;
2014-04-07 16:39:54 -04:00
if ( EFF )
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: EFF: %s \n " , EFF ) ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
pdf - > enc_method_stream = parse_enc_method ( pdf - > CF , n , StmF , ENC_IDENTITY ) ;
pdf - > enc_method_string = parse_enc_method ( pdf - > CF , n , StrF , ENC_IDENTITY ) ;
2014-04-07 16:39:54 -04:00
pdf - > enc_method_embeddedfile = parse_enc_method ( pdf - > CF , n , EFF , pdf - > enc_method_stream ) ;
free ( StmF ) ;
free ( StrF ) ;
free ( EFF ) ;
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: EncryptMetadata: %s \n " , EM ? " true " : " false " ) ;
2014-04-07 16:39:54 -04:00
if ( R = = 4 ) {
length = 128 ;
} else {
2018-12-03 12:40:13 -05:00
n = 0 ;
UE = pdf_readstring ( q , len , " /UE " , & n , NULL , 0 ) ;
2014-04-07 16:39:54 -04:00
length = 256 ;
}
}
if ( length = = ~ 0u )
length = 40 ;
n = 0 ;
O = pdf_readstring ( q , len , " /O " , & n , NULL , 0 ) ;
if ( ! O | | n < oulen ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: invalid O: %d \n " , n ) ;
cli_dbgmsg ( " pdf_handle_enc: invalid O: %d \n " , n ) ;
2014-04-07 16:39:54 -04:00
if ( O )
dbg_printhex ( " invalid O " , O , n ) ;
break ;
}
if ( n > oulen ) {
2018-12-03 12:40:13 -05:00
for ( i = oulen ; i < n ; i + + )
2014-04-07 16:39:54 -04:00
if ( O [ i ] )
break ;
if ( i ! = n ) {
2018-08-14 14:00:31 -07:00
dbg_printhex ( " pdf_handle_enc: too long O " , O , n ) ;
noisy_warnmsg ( " pdf_handle_enc: too long O: %u " , n ) ;
2014-04-07 16:39:54 -04:00
break ;
}
}
n = 0 ;
U = pdf_readstring ( q , len , " /U " , & n , NULL , 0 ) ;
if ( ! U | | n < oulen ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: invalid U: %u \n " , n ) ;
noisy_warnmsg ( " pdf_handle_enc: invalid U: %u \n " , n ) ;
2014-04-07 16:39:54 -04:00
if ( U )
dbg_printhex ( " invalid U " , U , n ) ;
break ;
}
if ( n > oulen ) {
2018-12-03 12:40:13 -05:00
for ( i = oulen ; i < n ; i + + )
2014-04-07 16:39:54 -04:00
if ( U [ i ] )
break ;
if ( i ! = n ) {
dbg_printhex ( " too long U " , U , n ) ;
break ;
}
}
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: Encrypt R: %d, P %x, length: %u \n " , R , P , length ) ;
2014-04-07 16:39:54 -04:00
if ( length % 8 ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_handle_enc: wrong key length, not multiple of 8 \n " ) ;
noisy_warnmsg ( " pdf_handle_enc: wrong key length, not multiple of 8 \n " ) ;
2014-04-07 16:39:54 -04:00
break ;
}
check_user_password ( pdf , R , O , U , P , EM , UE , length , oulen ) ;
2011-05-07 18:06:06 +03:00
} while ( 0 ) ;
2014-04-07 16:39:54 -04:00
2011-05-07 18:06:06 +03:00
free ( O ) ;
free ( U ) ;
2011-12-15 13:27:31 +02:00
free ( UE ) ;
2011-05-07 18:06:06 +03:00
}
2018-08-14 14:00:31 -07:00
/**
2019-03-05 21:15:41 -05:00
* @ brief Search pdf buffer for objects . Parse each .
*
2018-08-14 14:00:31 -07:00
* Newly found objects will be extracted after completion when the extraction for loop continues .
2019-03-05 21:15:41 -05:00
*
* @ param pdf Pdf struct that keeps track of all information found in the PDF .
2018-08-14 14:00:31 -07:00
* @ param objstm Pointer to an object stream to parse .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ return cl_error_t Error code .
*/
cl_error_t pdf_find_and_parse_objs_in_objstm ( struct pdf_struct * pdf , struct objstm_struct * objstm )
{
2019-03-05 21:15:41 -05:00
cl_error_t status = CL_EFORMAT ;
cl_error_t retval = CL_EPARSE ;
int32_t alerts = 0 ;
2018-08-14 14:00:31 -07:00
uint32_t badobjects = 0 ;
2018-12-03 12:40:13 -05:00
size_t i = 0 ;
2018-08-14 14:00:31 -07:00
2018-12-03 12:40:13 -05:00
struct pdf_obj * obj = NULL ;
2018-08-14 14:00:31 -07:00
2019-01-22 14:15:46 -05:00
if ( ( NULL = = objstm ) | | ( NULL = = objstm - > streambuf ) ) {
status = CL_EARG ;
goto done ;
}
2018-12-03 12:40:13 -05:00
if ( ( 0 = = objstm - > first ) | |
( 0 = = objstm - > streambuf_len ) | |
( 0 = = objstm - > n ) ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_find_and_parse_objs_in_objstm: Empty object stream. \n " ) ;
goto done ;
}
2018-12-03 12:40:13 -05:00
if ( objstm - > first > = objstm - > streambuf_len ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_find_and_parse_objs_in_objstm: Invalid objstm values. Offset of first obj greater than stream length. \n " ) ;
goto done ;
}
/* Process each object */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < objstm - > n ; i + + ) {
2018-08-14 14:00:31 -07:00
obj = NULL ;
if ( cli_checktimelimit ( pdf - > ctx ) ! = CL_SUCCESS ) {
cli_errmsg ( " Timeout reached in the PDF parser while parsing object stream. \n " ) ;
status = CL_ETIMEOUT ;
goto done ;
}
/* Find object */
retval = pdf_findobj_in_objstm ( pdf , objstm , & obj ) ;
2018-12-03 12:40:13 -05:00
if ( retval ! = CL_SUCCESS ) {
2018-08-14 14:00:31 -07:00
cli_dbgmsg ( " pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected. \n " ,
2018-12-03 12:40:13 -05:00
objstm - > nobjs_found , objstm - > n ) ;
2018-08-14 14:00:31 -07:00
badobjects + + ;
pdf - > stats . ninvalidobjs + + ;
break ;
}
cli_dbgmsg ( " pdf_find_and_parse_objs_in_objstm: Found object %u %u in object stream at offset: %u \n " , obj - > id > > 8 , obj - > id & 0xff , obj - > start ) ;
if ( cli_checktimelimit ( pdf - > ctx ) ! = CL_SUCCESS ) {
cli_errmsg ( " Timeout reached in the PDF parser while parsing object stream. \n " ) ;
status = CL_ETIMEOUT ;
goto done ;
}
/* Parse object */
pdf_parseobj ( pdf , obj ) ;
}
if ( alerts ) {
status = CL_VIRUS ;
goto done ;
2018-12-03 12:40:13 -05:00
} else if ( badobjects ) {
2018-08-14 14:00:31 -07:00
status = CL_EFORMAT ;
goto done ;
}
2018-12-03 12:40:13 -05:00
2018-08-14 14:00:31 -07:00
status = CL_SUCCESS ;
done :
return status ;
}
/**
* @ brief Search pdf buffer for objects . Parse each and then extract each .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param pdf Pdf struct that keeps track of all information found in the PDF .
* @ param alerts [ in / out ] The number of alerts , relevant in ALLMATCH mode .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ return cl_error_t Error code .
*/
cl_error_t pdf_find_and_extract_objs ( struct pdf_struct * pdf , uint32_t * alerts )
{
2019-03-05 21:15:41 -05:00
cl_error_t status = CL_SUCCESS ;
int32_t rv = 0 ;
unsigned int i = 0 ;
2018-08-14 14:00:31 -07:00
uint32_t badobjects = 0 ;
2018-12-03 12:40:13 -05:00
cli_ctx * ctx = pdf - > ctx ;
2018-08-14 14:00:31 -07:00
2018-11-14 16:58:30 -05:00
if ( NULL = = pdf | | NULL = = alerts ) {
cli_errmsg ( " pdf_find_and_extract_objs: Invalid arguments. \n " ) ;
status = CL_EARG ;
goto done ;
}
2018-08-14 14:00:31 -07:00
/* parse PDF and find obj offsets */
while ( CL_BREAK ! = ( rv = pdf_findobj ( pdf ) ) ) {
if ( rv = = CL_EMEM ) {
break ;
}
}
if ( rv = = - 1 )
pdf - > flags | = 1 < < BAD_PDF_TOOMANYOBJS ;
/* must parse after finding all objs, so we can flag indirect objects */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < pdf - > nobjs ; i + + ) {
2018-08-14 14:00:31 -07:00
struct pdf_obj * obj = pdf - > objs [ i ] ;
if ( cli_checktimelimit ( pdf - > ctx ) ! = CL_SUCCESS ) {
cli_errmsg ( " pdf_find_and_extract_objs: Timeout reached in the PDF parser while parsing objects. \n " ) ;
status = CL_ETIMEOUT ;
goto done ;
}
pdf_parseobj ( pdf , obj ) ;
}
pdf_handle_enc ( pdf ) ;
if ( pdf - > flags & ( 1 < < ENCRYPTED_PDF ) )
cli_dbgmsg ( " pdf_find_and_extract_objs: encrypted pdf found, %s! \n " ,
2018-12-03 12:40:13 -05:00
( pdf - > flags & ( 1 < < DECRYPTABLE_PDF ) ) ? " decryptable " : " not decryptable, stream will probably fail to decompress " ) ;
2018-08-14 14:00:31 -07:00
2018-10-10 06:02:28 -07:00
if ( SCAN_HEURISTIC_ENCRYPTED_DOC & &
2018-12-03 12:40:13 -05:00
( pdf - > flags & ( 1 < < ENCRYPTED_PDF ) ) & &
! ( pdf - > flags & ( 1 < < DECRYPTABLE_PDF ) ) ) {
2018-08-14 14:00:31 -07:00
/* It is encrypted, and a password/key needs to be supplied to decrypt.
* This doesn ' t trigger for PDFs that are encrypted but don ' t need
* a password to decrypt */
status = cli_append_virus ( pdf - > ctx , " Heuristics.Encrypted.PDF " ) ;
2018-12-03 12:40:13 -05:00
if ( status = = CL_VIRUS ) {
2018-11-14 16:58:30 -05:00
* alerts + + ;
2018-07-20 22:28:48 -04:00
if ( SCAN_ALLMATCHES )
2018-08-14 14:00:31 -07:00
status = CL_CLEAN ;
}
}
if ( ! status ) {
status = run_pdf_hooks ( pdf , PDF_PHASE_PARSED , - 1 , - 1 ) ;
cli_dbgmsg ( " pdf_find_and_extract_objs: (parsed hooks) returned %d \n " , status ) ;
if ( status = = CL_VIRUS ) {
2018-11-14 16:58:30 -05:00
* alerts + + ;
2018-07-20 22:28:48 -04:00
if ( SCAN_ALLMATCHES ) {
2018-08-14 14:00:31 -07:00
status = CL_CLEAN ;
}
}
}
/* extract PDF objs */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; ! status & & i < pdf - > nobjs ; i + + ) {
2018-08-14 14:00:31 -07:00
struct pdf_obj * obj = pdf - > objs [ i ] ;
if ( cli_checktimelimit ( pdf - > ctx ) ! = CL_SUCCESS ) {
cli_errmsg ( " pdf_find_and_extract_objs: Timeout reached in the PDF parser while extracting objects. \n " ) ;
status = CL_ETIMEOUT ;
goto done ;
}
status = pdf_extract_obj ( pdf , obj , PDF_EXTRACT_OBJ_SCAN ) ;
switch ( status ) {
case CL_EFORMAT :
/* Don't halt on one bad object */
cli_dbgmsg ( " pdf_find_and_extract_objs: Format error when extracting object, skipping to the next object. \n " ) ;
badobjects + + ;
pdf - > stats . ninvalidobjs + + ;
status = CL_CLEAN ;
break ;
case CL_VIRUS :
2018-11-14 16:58:30 -05:00
* alerts + + ;
2018-07-20 22:28:48 -04:00
if ( SCAN_ALLMATCHES ) {
2018-08-14 14:00:31 -07:00
status = CL_CLEAN ;
}
break ;
default :
break ;
}
}
done :
if ( ! status & & badobjects ) {
status = CL_EFORMAT ;
}
return status ;
}
/**
* @ brief Primary function for parsing and scanning a PDF .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param dir Filepath for temp file .
2019-03-05 21:15:41 -05:00
* @ param ctx clam scan context structure .
2018-08-14 14:00:31 -07:00
* @ param offset offset of pdf in ctx - > fmap
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ return int Returns cl_error_t status value .
*/
2010-05-10 11:57:44 +03:00
int cli_pdf ( const char * dir , cli_ctx * ctx , off_t offset )
{
2018-08-14 14:00:31 -07:00
cl_error_t rc = CL_SUCCESS ;
2010-05-10 11:57:44 +03:00
struct pdf_struct pdf ;
2018-12-03 12:40:13 -05:00
fmap_t * map = * ctx - > fmap ;
size_t size = map - > len - offset ;
2010-05-10 11:57:44 +03:00
off_t versize = size > 1032 ? 1032 : size ;
off_t map_off , bytesleft ;
2018-06-12 20:47:21 -04:00
unsigned long xref ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2018-03-08 12:17:11 -05:00
const char * pdfver , * tmp , * start , * eofmap , * q , * eof ;
2013-06-20 13:43:46 -04:00
unsigned i , alerts = 0 ;
2018-08-14 14:00:31 -07:00
unsigned int objs_found = 0 ;
2014-06-25 13:36:30 -04:00
# if HAVE_JSON
2018-12-03 12:40:13 -05:00
json_object * pdfobj = NULL ;
2014-06-30 14:06:37 -04:00
char * begin , * end , * p1 ;
2014-06-25 13:36:30 -04:00
# endif
2010-05-10 11:57:44 +03:00
cli_dbgmsg ( " in cli_pdf(%s) \n " , dir ) ;
memset ( & pdf , 0 , sizeof ( pdf ) ) ;
2018-12-03 12:40:13 -05:00
pdf . ctx = ctx ;
pdf . dir = dir ;
2011-05-07 18:06:06 +03:00
pdf . enc_objid = ~ 0u ;
2010-05-10 11:57:44 +03:00
pdfver = start = fmap_need_off_once ( map , offset , versize ) ;
/* Check PDF version */
if ( ! pdfver ) {
2014-04-07 16:39:54 -04:00
cli_errmsg ( " cli_pdf: mmap() failed (1) \n " ) ;
2018-08-14 14:00:31 -07:00
rc = CL_EMAP ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2014-06-25 13:36:30 -04:00
# if HAVE_JSON
if ( ctx - > wrkproperty )
pdfobj = cli_jsonobj ( ctx - > wrkproperty , " PDFStats " ) ;
# endif
2010-05-10 11:57:44 +03:00
/* offset is 0 when coming from filetype2 */
2018-03-08 12:17:11 -05:00
tmp = cli_memstr ( pdfver , versize , " %PDF- " , 5 ) ;
if ( ! tmp ) {
2014-04-07 16:39:54 -04:00
cli_dbgmsg ( " cli_pdf: no PDF- header found \n " ) ;
noisy_warnmsg ( " cli_pdf: no PDF- header found \n " ) ;
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-05-24 11:15:54 -04:00
pdf_export_json ( & pdf ) ;
2014-07-08 19:53:41 -04:00
# endif
2018-08-14 14:00:31 -07:00
rc = CL_SUCCESS ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2018-03-08 12:17:11 -05:00
versize - = tmp - pdfver ;
pdfver = tmp ;
if ( versize < 8 ) {
2018-08-14 14:00:31 -07:00
rc = CL_EFORMAT ;
goto done ;
2018-03-08 12:17:11 -05:00
}
2014-04-07 16:39:54 -04:00
/* Check for PDF-1.[0-9]. Although 1.7 is highest now, allow for future versions */
2010-05-10 11:57:44 +03:00
if ( pdfver [ 5 ] ! = ' 1 ' | | pdfver [ 6 ] ! = ' . ' | |
2014-04-07 16:39:54 -04:00
pdfver [ 7 ] < ' 1 ' | | pdfver [ 7 ] > ' 9 ' ) {
pdf . flags | = 1 < < BAD_PDF_VERSION ;
cli_dbgmsg ( " cli_pdf: bad pdf version: %.8s \n " , pdfver ) ;
2014-06-25 13:36:30 -04:00
# if HAVE_JSON
if ( pdfobj )
cli_jsonbool ( pdfobj , " BadVersion " , 1 ) ;
2014-06-30 14:06:37 -04:00
# endif
} else {
# if HAVE_JSON
if ( pdfobj ) {
2018-12-03 12:40:13 -05:00
begin = ( char * ) ( pdfver + 5 ) ;
end = begin + 2 ;
2014-06-30 14:06:37 -04:00
strtoul ( end , & end , 10 ) ;
p1 = cli_calloc ( ( end - begin ) + 2 , 1 ) ;
if ( p1 ) {
strncpy ( p1 , begin , end - begin ) ;
p1 [ end - begin ] = ' \0 ' ;
cli_jsonstr ( pdfobj , " PDFVersion " , p1 ) ;
free ( p1 ) ;
}
}
2014-06-25 13:36:30 -04:00
# endif
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
if ( pdfver ! = start | | offset ) {
2014-04-07 16:39:54 -04:00
pdf . flags | = 1 < < BAD_PDF_HEADERPOS ;
2017-09-21 14:26:37 -04:00
cli_dbgmsg ( " cli_pdf: PDF header is not at position 0: %lld \n " , ( long long ) ( pdfver - start + offset ) ) ;
2014-06-25 13:36:30 -04:00
# if HAVE_JSON
if ( pdfobj )
cli_jsonbool ( pdfobj , " BadVersionLocation " , 1 ) ;
# endif
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
offset + = pdfver - start ;
/* find trailer and xref, don't fail if not found */
2010-09-28 12:42:41 +03:00
map_off = ( off_t ) map - > len - 2048 ;
2010-05-10 11:57:44 +03:00
if ( map_off < 0 )
2014-04-07 16:39:54 -04:00
map_off = 0 ;
2010-05-10 11:57:44 +03:00
bytesleft = map - > len - map_off ;
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
eofmap = fmap_need_off_once ( map , map_off , bytesleft ) ;
if ( ! eofmap ) {
2014-04-07 16:39:54 -04:00
cli_errmsg ( " cli_pdf: mmap() failed (2) \n " ) ;
2018-08-14 14:00:31 -07:00
rc = CL_EMAP ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
eof = eofmap + bytesleft ;
2018-12-03 12:40:13 -05:00
for ( q = & eofmap [ bytesleft - 5 ] ; q > eofmap ; q - - ) {
2014-04-07 16:39:54 -04:00
if ( memcmp ( q , " %%EOF " , 5 ) = = 0 )
break ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
2010-05-10 11:57:44 +03:00
if ( q < = eofmap ) {
2014-04-07 16:39:54 -04:00
pdf . flags | = 1 < < BAD_PDF_TRAILER ;
cli_dbgmsg ( " cli_pdf: %%%%EOF not found \n " ) ;
2014-06-25 16:26:33 -04:00
# if HAVE_JSON
if ( pdfobj )
cli_jsonbool ( pdfobj , " NoEOF " , 1 ) ;
# endif
2010-05-10 11:57:44 +03:00
} else {
2014-04-07 16:39:54 -04:00
const char * t ;
/*size = q - eofmap + map_off;*/
q - = 9 ;
2018-12-03 12:40:13 -05:00
for ( ; q > eofmap ; q - - ) {
2014-04-07 16:39:54 -04:00
if ( memcmp ( q , " startxref " , 9 ) = = 0 )
break ;
}
if ( q < = eofmap ) {
pdf . flags | = 1 < < BAD_PDF_TRAILER ;
cli_dbgmsg ( " cli_pdf: startxref not found \n " ) ;
2014-06-25 16:26:33 -04:00
# if HAVE_JSON
if ( pdfobj )
cli_jsonbool ( pdfobj , " NoXREF " , 1 ) ;
# endif
2014-04-07 16:39:54 -04:00
} else {
2018-12-03 12:40:13 -05:00
for ( t = q ; t > eofmap ; t - - ) {
if ( memcmp ( t , " trailer " , 7 ) = = 0 )
2014-04-07 16:39:54 -04:00
break ;
}
pdf_parse_trailer ( & pdf , eofmap , eof - eofmap ) ;
q + = 9 ;
2018-12-03 12:40:13 -05:00
while ( q < eof & & ( * q = = ' ' | | * q = = ' \n ' | | * q = = ' \r ' ) ) {
q + + ;
}
2014-04-07 16:39:54 -04:00
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( q , q - eofmap + map_off , 0 , 10 , & temp_long ) ) {
2018-06-02 20:58:35 -04:00
cli_dbgmsg ( " cli_pdf: failed to parse PDF trailer xref \n " ) ;
2014-04-07 16:39:54 -04:00
pdf . flags | = 1 < < BAD_PDF_TRAILER ;
2019-01-22 14:15:46 -05:00
} else if ( temp_long < 0 ) {
cli_dbgmsg ( " cli_pdf: Encountered invalid negative PDF trailer xref (%ld). \n " , temp_long ) ;
pdf . flags | = 1 < < BAD_PDF_TRAILER ;
2018-12-03 12:40:13 -05:00
} else {
2019-01-22 14:15:46 -05:00
xref = ( unsigned long ) temp_long ;
2018-06-02 20:58:35 -04:00
bytesleft = map - > len - offset - xref ;
if ( bytesleft > 4096 )
bytesleft = 4096 ;
q = fmap_need_off_once ( map , offset + xref , bytesleft ) ;
2018-12-03 12:40:13 -05:00
if ( ! q | | xrefCheck ( q , q + bytesleft ) = = - 1 ) {
2018-06-02 20:58:35 -04:00
cli_dbgmsg ( " cli_pdf: did not find valid xref \n " ) ;
pdf . flags | = 1 < < BAD_PDF_TRAILER ;
}
}
2014-04-07 16:39:54 -04:00
}
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
size - = offset ;
2010-05-10 11:57:44 +03:00
pdf . size = size ;
2018-12-03 12:40:13 -05:00
pdf . map = fmap_need_off ( map , offset , size ) ;
2010-05-10 11:57:44 +03:00
if ( ! pdf . map ) {
2014-04-07 16:39:54 -04:00
cli_errmsg ( " cli_pdf: mmap() failed (3) \n " ) ;
2018-08-14 14:00:31 -07:00
rc = CL_EMAP ;
goto done ;
2010-05-10 11:57:44 +03:00
}
2014-04-07 16:39:54 -04:00
pdf . startoff = offset ;
2010-09-10 22:11:32 +03:00
rc = run_pdf_hooks ( & pdf , PDF_PHASE_PRE , - 1 , - 1 ) ;
2018-07-20 22:28:48 -04:00
if ( ( rc = = CL_VIRUS ) & & SCAN_ALLMATCHES ) {
2013-06-20 13:43:46 -04:00
cli_dbgmsg ( " cli_pdf: (pre hooks) returned %d \n " , rc ) ;
alerts + + ;
rc = CL_CLEAN ;
2014-04-07 16:39:54 -04:00
} else if ( rc ) {
cli_dbgmsg ( " cli_pdf: (pre hooks) returning %d \n " , rc ) ;
2014-06-30 14:35:42 -04:00
2018-08-14 14:00:31 -07:00
rc = rc = = CL_BREAK ? CL_CLEAN : rc ;
goto done ;
2013-06-20 13:43:46 -04:00
}
2018-08-14 14:00:31 -07:00
/*
2019-03-05 21:15:41 -05:00
* Find and extract all objects in the PDF .
2018-08-14 14:00:31 -07:00
* New experimental recursive methodology that adds objects from object streams .
*/
objs_found = pdf . nobjs ;
2018-12-03 12:40:13 -05:00
rc = pdf_find_and_extract_objs ( & pdf , & alerts ) ;
2014-06-30 14:35:42 -04:00
2018-08-14 14:00:31 -07:00
if ( pdf . nobjs < = objs_found ) {
cli_dbgmsg ( " cli_pdf: pdf_find_and_extract_objs did not find any new objects! \n " ) ;
} else {
cli_dbgmsg ( " cli_pdf: pdf_find_and_extract_objs found %d new objects. \n " , pdf . nobjs - objs_found ) ;
2010-05-11 10:37:10 +03:00
}
2010-07-30 16:54:53 +03:00
if ( pdf . flags & ( 1 < < ENCRYPTED_PDF ) )
2018-12-03 12:40:13 -05:00
pdf . flags & = ~ ( ( 1 < < BAD_FLATESTART ) | ( 1 < < BAD_STREAMSTART ) | ( 1 < < BAD_ASCIIDECODE ) ) ;
2010-07-30 16:54:53 +03:00
2018-08-14 14:00:31 -07:00
if ( pdf . flags & & ! rc ) {
2014-04-07 16:39:54 -04:00
cli_dbgmsg ( " cli_pdf: flags 0x%02x \n " , pdf . flags ) ;
rc = run_pdf_hooks ( & pdf , PDF_PHASE_END , - 1 , - 1 ) ;
2013-06-20 13:43:46 -04:00
if ( rc = = CL_VIRUS ) {
alerts + + ;
2018-07-20 22:28:48 -04:00
if ( SCAN_ALLMATCHES ) {
2013-06-20 13:43:46 -04:00
rc = CL_CLEAN ;
}
}
2014-04-07 16:39:54 -04:00
2018-07-20 22:28:48 -04:00
if ( ! rc & & SCAN_HEURISTICS & & ( ctx - > dconf - > other & OTHER_CONF_PDFNAMEOBJ ) ) {
2013-06-20 13:43:46 -04:00
if ( pdf . flags & ( 1 < < ESCAPED_COMMON_PDFNAME ) ) {
/* for example /Fl#61te#44#65#63#6f#64#65 instead of /FlateDecode */
2017-04-18 12:03:36 -04:00
cli_append_possibly_unwanted ( ctx , " Heuristics.PDF.ObfuscatedNameObject " ) ;
2013-06-20 13:43:46 -04:00
}
}
2010-07-30 14:24:52 +03:00
#if 0
2018-06-01 14:23:25 -04:00
/* TODO: find both trailers, and /Encrypt settings */
if ( pdf . flags & ( 1 < < LINEARIZED_PDF ) )
pdf . flags & = ~ ( 1 < < BAD_ASCIIDECODE ) ;
if ( pdf . flags & ( 1 < < MANY_FILTERS ) )
pdf . flags & = ~ ( 1 < < BAD_ASCIIDECODE ) ;
if ( ! rc & & ( pdf . flags &
( ( 1 < < BAD_PDF_TOOMANYOBJS ) | ( 1 < < BAD_STREAM_FILTERS ) |
( 1 < < BAD_FLATE ) | ( 1 < < BAD_ASCIIDECODE ) |
( 1 < < UNTERMINATED_OBJ_DICT ) | ( 1 < < UNKNOWN_FILTER ) ) ) ) {
rc = CL_EUNPACK ;
}
2010-07-30 14:23:10 +03:00
# endif
2010-05-11 11:26:35 +03:00
}
2012-12-17 11:12:11 -05:00
2018-08-14 14:00:31 -07:00
done :
2013-06-20 13:43:46 -04:00
if ( alerts ) {
rc = CL_VIRUS ;
2018-12-03 12:40:13 -05:00
} else if ( ! rc & & pdf . stats . ninvalidobjs > 0 ) {
2012-12-17 11:12:11 -05:00
rc = CL_EFORMAT ;
}
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-04-29 17:27:02 -04:00
pdf_export_json ( & pdf ) ;
2014-07-08 19:53:41 -04:00
# endif
2014-04-29 17:27:02 -04:00
2018-08-14 14:00:31 -07:00
if ( pdf . objstms ) {
for ( i = 0 ; i < pdf . nobjstms ; i + + ) {
if ( pdf . objstms [ i ] ) {
if ( pdf . objstms [ i ] - > streambuf ) {
free ( pdf . objstms [ i ] - > streambuf ) ;
pdf . objstms [ i ] - > streambuf = NULL ;
}
free ( pdf . objstms [ i ] ) ;
pdf . objstms [ i ] = NULL ;
}
}
free ( pdf . objstms ) ;
pdf . objstms = NULL ;
}
if ( NULL ! = pdf . objs ) {
for ( i = 0 ; i < pdf . nobjs ; i + + ) {
if ( NULL ! = pdf . objs [ i ] ) {
free ( pdf . objs [ i ] ) ;
pdf . objs [ i ] = NULL ;
}
}
free ( pdf . objs ) ;
pdf . objs = NULL ;
}
if ( pdf . fileID ) {
free ( pdf . fileID ) ;
pdf . fileID = NULL ;
}
if ( pdf . key ) {
free ( pdf . key ) ;
pdf . key = NULL ;
}
2012-12-17 11:12:11 -05:00
2010-11-15 23:27:10 +02:00
/* PDF hooks may abort, don't return CL_BREAK to caller! */
2018-08-14 14:00:31 -07:00
rc = ( rc = = CL_BREAK ) ? CL_CLEAN : rc ;
cli_dbgmsg ( " cli_pdf: returning %d \n " , rc ) ;
return rc ;
2010-05-10 11:57:44 +03:00
}
2018-08-14 14:00:31 -07:00
/**
* @ brief Skip the rest of the current line , and find the start of the next line .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param ptr Current offset into buffer .
2019-03-05 21:15:41 -05:00
* @ param len Remaining bytes in buffer .
*
2018-08-14 14:00:31 -07:00
* @ return const char * Address of next line , or NULL if no next line in buffer .
2005-05-24 18:44:03 +00:00
*/
static const char *
pdf_nextlinestart ( const char * ptr , size_t len )
{
2019-01-22 18:04:53 -05:00
if ( ! ptr | | ( 0 = = len ) ) {
/* Invalid args */
return NULL ;
}
2018-12-03 12:40:13 -05:00
while ( strchr ( " \r \n " , * ptr ) = = NULL ) {
if ( - - len = = 0L )
2014-04-07 16:39:54 -04:00
return NULL ;
ptr + + ;
}
2018-12-03 12:40:13 -05:00
while ( strchr ( " \r \n " , * ptr ) ! = NULL ) {
if ( - - len = = 0L )
2014-04-07 16:39:54 -04:00
return NULL ;
ptr + + ;
}
return ptr ;
2005-05-24 18:44:03 +00:00
}
2005-05-27 14:44:00 +00:00
2018-08-14 14:00:31 -07:00
/**
* @ brief Return the start of the next PDF object .
2019-03-05 21:15:41 -05:00
*
2005-07-30 10:08:59 +00:00
* This assumes that we ' re not in a stream .
2019-03-05 21:15:41 -05:00
*
2018-08-14 14:00:31 -07:00
* @ param ptr Current offset into buffer .
2019-03-05 21:15:41 -05:00
* @ param len Remaining bytes in buffer .
*
* @ return const char * Address of next object in the buffer , or NULL if there is none in the buffer .
2005-07-30 10:08:59 +00:00
*/
static const char *
pdf_nextobject ( const char * ptr , size_t len )
{
2014-04-07 16:39:54 -04:00
const char * p ;
int inobject = 1 ;
2018-12-03 12:40:13 -05:00
while ( len ) {
switch ( * ptr ) {
case ' \n ' :
case ' \r ' :
case ' % ' : /* comment */
p = pdf_nextlinestart ( ptr , len ) ;
if ( p = = NULL )
return NULL ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
len - = ( size_t ) ( p - ptr ) ;
ptr = p ;
inobject = 0 ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ' ' :
case ' \t ' :
case ' [ ' : /* Start of an array object */
case ' \v ' :
case ' \f ' :
case ' < ' : /* Start of a dictionary object */
inobject = 0 ;
ptr + + ;
len - - ;
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
break ;
case ' / ' : /* Start of a name object */
2014-04-07 16:39:54 -04:00
return ptr ;
2018-12-03 12:40:13 -05:00
case ' ( ' : /* start of JS */
return ptr ;
default :
if ( ! inobject ) {
/* TODO: parse and return object type */
return ptr ;
}
2014-04-07 16:39:54 -04:00
2018-12-03 12:40:13 -05:00
ptr + + ;
len - - ;
2014-04-07 16:39:54 -04:00
}
}
return NULL ;
2005-05-27 14:44:00 +00:00
}
2014-04-16 14:23:16 -04:00
/* PDF statistics */
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void ASCIIHexDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nasciihexdecode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void ASCII85Decode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nascii85decode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void EmbeddedFile_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nembeddedfile + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void FlateDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nflate + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Image_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nimage + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void LZWDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nlzw + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void RunLengthDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nrunlengthdecode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void CCITTFaxDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nfaxdecode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void JBIG2Decode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
struct json_object * pdfobj , * jbig2arr ;
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
2014-06-13 20:40:46 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2014-06-13 20:40:46 -04:00
if ( ! ( pdf - > ctx - > wrkproperty ) )
return ;
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
jbig2arr = cli_jsonarray ( pdfobj , " JBIG2Objects " ) ;
if ( ! ( jbig2arr ) )
return ;
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( jbig2arr , obj - > id > > 8 ) ;
2014-06-13 20:40:46 -04:00
pdf - > stats . njbig2decode + + ;
2014-04-16 14:23:16 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void DCTDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . ndctdecode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void JPXDecode_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . njpxdecode + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Crypt_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . ncrypt + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Standard_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nstandard + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Sig_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nsigned + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void JavaScript_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
struct json_object * pdfobj , * jbig2arr ;
UNUSEDPARAM ( act ) ;
2014-06-17 16:42:58 -04:00
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2014-06-17 16:42:58 -04:00
if ( ! ( pdf - > ctx - > wrkproperty ) )
return ;
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
jbig2arr = cli_jsonarray ( pdfobj , " JavascriptObjects " ) ;
if ( ! ( jbig2arr ) )
return ;
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( jbig2arr , obj - > id > > 8 ) ;
2014-06-17 16:42:58 -04:00
2014-04-16 14:23:16 -04:00
pdf - > stats . njs + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void OpenAction_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nopenaction + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Launch_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nlaunch + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Page_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-04-16 14:23:16 -04:00
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-04-16 14:23:16 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . npage + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-04-16 14:23:16 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Author_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-05-23 14:06:35 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-05-23 14:06:35 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . author ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . author = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . author ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . author - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Author " , NULL , & ( pdf - > stats . author - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-05-23 14:06:35 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-05-23 14:06:35 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Creator_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-05-23 14:06:35 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-05-23 14:06:35 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . creator ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . creator = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . creator ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . creator - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Creator " , NULL , & ( pdf - > stats . creator - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-05-23 14:06:35 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-05-23 14:06:35 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void ModificationDate_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-05-23 14:06:35 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-05-23 14:06:35 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . modificationdate ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . modificationdate = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . modificationdate ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . modificationdate - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /ModDate " , NULL , & ( pdf - > stats . modificationdate - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-05-23 14:06:35 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-05-23 14:06:35 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void CreationDate_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-05-23 14:06:35 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-05-23 14:06:35 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . creationdate ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . creationdate = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . creationdate ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . creationdate - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /CreationDate " , NULL , & ( pdf - > stats . creationdate - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-05-23 14:06:35 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-05-23 14:06:35 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Producer_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-05-23 14:06:35 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-05-23 14:06:35 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . producer ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . producer = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . producer ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . producer - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Producer " , NULL , & ( pdf - > stats . producer - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-05-23 14:06:35 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-05-23 14:06:35 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Title_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-06-13 11:18:07 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-06-13 11:18:07 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . title ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . title = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . title ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . title - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Title " , NULL , & ( pdf - > stats . title - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-06-13 11:18:07 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-13 11:18:07 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Keywords_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-06-13 11:18:07 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-06-13 11:18:07 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . keywords ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . keywords = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . keywords ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . keywords - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Keywords " , NULL , & ( pdf - > stats . keywords - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-06-13 11:18:07 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-13 11:18:07 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Subject_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-06-13 11:18:07 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-06-13 11:18:07 -04:00
if ( ! ( pdf ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2015-04-01 17:41:59 -04:00
if ( ! ( pdf - > stats . subject ) ) {
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . subject = cli_calloc ( 1 , sizeof ( struct pdf_stats_entry ) ) ;
if ( ! ( pdf - > stats . subject ) )
return ;
2019-03-05 21:15:41 -05:00
pdf - > stats . subject - > data = pdf_parse_string ( pdf , obj , objstart , obj - > size , " /Subject " , NULL , & ( pdf - > stats . subject - > meta ) ) ;
2015-04-01 17:41:59 -04:00
}
2014-06-13 11:18:07 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-13 11:18:07 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-27 12:43:23 -04:00
static void RichMedia_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-06-27 12:43:23 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nrichmedia + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-27 12:43:23 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-27 12:43:23 -04:00
static void AcroForm_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-06-27 12:43:23 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nacroform + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-27 12:43:23 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-27 12:43:23 -04:00
static void XFA_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
{
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( obj ) ;
UNUSEDPARAM ( act ) ;
2014-06-27 12:43:23 -04:00
if ( ! ( pdf ) )
return ;
pdf - > stats . nxfa + + ;
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-27 12:43:23 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Pages_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-06-19 17:41:15 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-06-23 17:41:28 -04:00
struct pdf_array * array ;
2018-08-14 14:00:31 -07:00
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2014-06-23 17:41:28 -04:00
const char * begin ;
2018-12-03 12:40:13 -05:00
unsigned long npages = 0 , count ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2014-06-23 17:41:28 -04:00
struct pdf_array_node * node ;
json_object * pdfobj ;
2018-10-25 13:06:15 -07:00
size_t countsize = 0 ;
2014-06-23 17:41:28 -04:00
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-06-23 17:41:28 -04:00
if ( ! ( pdf ) | | ! ( pdf - > ctx - > wrkproperty ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2014-06-23 17:41:28 -04:00
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
2019-03-05 21:15:41 -05:00
begin = cli_memstr ( objstart , obj - > size , " /Kids " , 5 ) ;
2014-06-23 17:41:28 -04:00
if ( ! ( begin ) )
return ;
begin + = 5 ;
2019-03-05 21:15:41 -05:00
array = pdf_parse_array ( pdf , obj , obj - > size , ( char * ) begin , NULL ) ;
2014-06-24 18:43:51 -04:00
if ( ! ( array ) ) {
cli_jsonbool ( pdfobj , " IncorrectPagesCount " , 1 ) ;
2014-06-24 10:43:15 -04:00
return ;
2014-06-24 18:43:51 -04:00
}
2014-06-24 10:43:15 -04:00
for ( node = array - > nodes ; node ! = NULL ; node = node - > next )
if ( node - > datasz )
2014-06-24 18:43:51 -04:00
if ( strchr ( ( char * ) ( node - > data ) , ' R ' ) )
2014-06-24 10:43:15 -04:00
npages + + ;
2014-06-23 17:41:28 -04:00
2019-03-05 21:15:41 -05:00
begin = cli_memstr ( objstart , obj - > size , " /Count " , 6 ) ;
2014-06-23 17:41:28 -04:00
if ( ! ( begin ) ) {
cli_jsonbool ( pdfobj , " IncorrectPagesCount " , 1 ) ;
goto cleanup ;
}
begin + = 6 ;
2019-03-05 21:15:41 -05:00
while ( ( ( size_t ) ( begin - objstart ) < obj - > size ) & & isspace ( begin [ 0 ] ) )
2014-06-23 17:41:28 -04:00
begin + + ;
2019-03-05 21:15:41 -05:00
if ( ( size_t ) ( begin - objstart ) > = obj - > size ) {
2014-06-23 17:41:28 -04:00
goto cleanup ;
}
2019-03-05 21:15:41 -05:00
countsize = ( obj - > objstm ) ? ( size_t ) ( obj - > start + obj - > objstm - > streambuf + obj - > size - begin )
: ( size_t ) ( obj - > start + pdf - > map + obj - > size - begin ) ;
2018-10-25 13:06:15 -07:00
2019-01-22 14:15:46 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( begin , countsize , 0 , 10 , & temp_long ) ) {
cli_jsonbool ( pdfobj , " IncorrectPagesCount " , 1 ) ;
} else if ( temp_long < 0 ) {
2014-06-23 17:41:28 -04:00
cli_jsonbool ( pdfobj , " IncorrectPagesCount " , 1 ) ;
2019-01-22 14:15:46 -05:00
} else {
count = ( unsigned long ) temp_long ;
if ( count ! = npages ) {
cli_jsonbool ( pdfobj , " IncorrectPagesCount " , 1 ) ;
}
2018-06-02 20:58:35 -04:00
}
2014-06-23 17:41:28 -04:00
cleanup :
pdf_free_array ( array ) ;
2014-06-19 17:41:15 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-19 17:41:15 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-06-25 14:06:17 -04:00
static void Colors_cb ( struct pdf_struct * pdf , struct pdf_obj * obj , struct pdfname_action * act )
2014-06-19 17:41:15 -04:00
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-06-19 17:41:15 -04:00
json_object * colorsobj , * pdfobj ;
unsigned long ncolors ;
2019-01-22 14:15:46 -05:00
long temp_long ;
2018-08-14 14:00:31 -07:00
char * p1 ;
const char * objstart = ( obj - > objstm ) ? ( const char * ) ( obj - > start + obj - > objstm - > streambuf )
: ( const char * ) ( obj - > start + pdf - > map ) ;
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( act ) ;
2014-06-19 17:41:15 -04:00
if ( ! ( pdf ) | | ! ( pdf - > ctx ) | | ! ( pdf - > ctx - > wrkproperty ) )
return ;
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) )
2014-07-01 10:50:08 -04:00
return ;
2019-03-05 21:15:41 -05:00
p1 = ( char * ) cli_memstr ( objstart , obj - > size , " /Colors " , 7 ) ;
2014-06-19 17:41:15 -04:00
if ( ! ( p1 ) )
return ;
p1 + = 7 ;
/* Ensure that we have at least one whitespace character plus at least one number */
2019-03-05 21:15:41 -05:00
if ( obj - > size - ( size_t ) ( p1 - objstart ) < 2 )
2014-06-19 17:41:15 -04:00
return ;
2019-03-05 21:15:41 -05:00
while ( ( ( size_t ) ( p1 - objstart ) < obj - > size ) & & isspace ( p1 [ 0 ] ) )
2014-06-19 17:41:15 -04:00
p1 + + ;
2019-03-05 21:15:41 -05:00
if ( ( size_t ) ( p1 - objstart ) = = obj - > size )
2014-06-19 17:41:15 -04:00
return ;
2019-03-05 21:15:41 -05:00
if ( CL_SUCCESS ! = cli_strntol_wrap ( p1 , ( size_t ) ( ( p1 - objstart ) - obj - > size ) , 0 , 10 , & temp_long ) ) {
2019-01-22 14:15:46 -05:00
return ;
} else if ( temp_long < 0 ) {
2018-06-02 20:58:35 -04:00
return ;
2019-01-22 14:15:46 -05:00
}
ncolors = ( unsigned long ) temp_long ;
2014-06-19 17:41:15 -04:00
/* We only care if the number of colors > 2**24 */
2018-12-03 12:40:13 -05:00
if ( ncolors < 1 < < 24 )
2014-06-19 17:41:15 -04:00
return ;
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
if ( ! ( pdfobj ) )
return ;
colorsobj = cli_jsonarray ( pdfobj , " BigColors " ) ;
if ( ! ( colorsobj ) )
return ;
2018-12-03 12:40:13 -05:00
cli_jsonint_array ( colorsobj , obj - > id > > 8 ) ;
2014-06-19 17:41:15 -04:00
}
2014-07-08 19:53:41 -04:00
# endif
2014-06-19 17:41:15 -04:00
2014-07-08 19:53:41 -04:00
# if HAVE_JSON
2014-04-29 17:27:02 -04:00
static void pdf_export_json ( struct pdf_struct * pdf )
{
2018-07-20 22:28:48 -04:00
cli_ctx * ctx = pdf - > ctx ;
2014-04-29 17:27:02 -04:00
json_object * pdfobj ;
2014-06-25 16:26:33 -04:00
unsigned long i ;
2014-04-29 17:27:02 -04:00
if ( ! ( pdf ) )
return ;
2014-06-10 13:52:15 -04:00
if ( ! ( pdf - > ctx ) ) {
goto cleanup ;
}
2014-04-29 17:27:02 -04:00
2018-07-20 22:28:48 -04:00
if ( ! ( SCAN_COLLECT_METADATA ) | | ! ( pdf - > ctx - > wrkproperty ) ) {
2014-06-10 13:52:15 -04:00
goto cleanup ;
}
2014-04-29 17:27:02 -04:00
2014-06-13 20:40:46 -04:00
pdfobj = cli_jsonobj ( pdf - > ctx - > wrkproperty , " PDFStats " ) ;
2014-06-10 13:52:15 -04:00
if ( ! ( pdfobj ) ) {
goto cleanup ;
}
2014-04-29 17:27:02 -04:00
2015-03-02 19:06:23 -05:00
if ( pdf - > stats . author ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . author - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . author - > meta . obj , pdf - > stats . author - > data , pdf - > stats . author - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . author - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . author - > data = out ;
pdf - > stats . author - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . author - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . author - > meta . success & & cli_isutf8 ( pdf - > stats . author - > data , pdf - > stats . author - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Author " , pdf - > stats . author - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . author - > data & & pdf - > stats . author - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . author - > data , pdf - > stats . author - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Author " , b64 ) ;
cli_jsonbool ( pdfobj , " Author_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " Author " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . creator ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . creator - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . creator - > meta . obj , pdf - > stats . creator - > data , pdf - > stats . creator - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . creator - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . creator - > data = out ;
pdf - > stats . creator - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . creator - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . creator - > meta . success & & cli_isutf8 ( pdf - > stats . creator - > data , pdf - > stats . creator - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Creator " , pdf - > stats . creator - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . creator - > data & & pdf - > stats . creator - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . creator - > data , pdf - > stats . creator - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Creator " , b64 ) ;
cli_jsonbool ( pdfobj , " Creator_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " Creator " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . producer ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . producer - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . producer - > meta . obj , pdf - > stats . producer - > data , pdf - > stats . producer - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . producer - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . producer - > data = out ;
pdf - > stats . producer - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . producer - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . producer - > meta . success & & cli_isutf8 ( pdf - > stats . producer - > data , pdf - > stats . producer - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Producer " , pdf - > stats . producer - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . producer - > data & & pdf - > stats . producer - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . producer - > data , pdf - > stats . producer - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Producer " , b64 ) ;
cli_jsonbool ( pdfobj , " Producer_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " Producer " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . modificationdate ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . modificationdate - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . modificationdate - > meta . obj , pdf - > stats . modificationdate - > data , pdf - > stats . modificationdate - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . modificationdate - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . modificationdate - > data = out ;
pdf - > stats . modificationdate - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . modificationdate - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . modificationdate - > meta . success & & cli_isutf8 ( pdf - > stats . modificationdate - > data , pdf - > stats . modificationdate - > meta . length ) ) {
cli_jsonstr ( pdfobj , " ModificationDate " , pdf - > stats . modificationdate - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . modificationdate - > data & & pdf - > stats . modificationdate - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . modificationdate - > data , pdf - > stats . modificationdate - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " ModificationDate " , b64 ) ;
cli_jsonbool ( pdfobj , " ModificationDate_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " ModificationDate " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . creationdate ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . creationdate - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . creationdate - > meta . obj , pdf - > stats . creationdate - > data , pdf - > stats . creationdate - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . creationdate - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . creationdate - > data = out ;
pdf - > stats . creationdate - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . creationdate - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . creationdate - > meta . success & & cli_isutf8 ( pdf - > stats . creationdate - > data , pdf - > stats . creationdate - > meta . length ) ) {
cli_jsonstr ( pdfobj , " CreationDate " , pdf - > stats . creationdate - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . creationdate - > data & & pdf - > stats . creationdate - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . creationdate - > data , pdf - > stats . creationdate - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " CreationDate " , b64 ) ;
cli_jsonbool ( pdfobj , " CreationDate_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " CreationDate " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . title ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . title - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . title - > meta . obj , pdf - > stats . title - > data , pdf - > stats . title - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . title - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . title - > data = out ;
pdf - > stats . title - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . title - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . title - > meta . success & & cli_isutf8 ( pdf - > stats . title - > data , pdf - > stats . title - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Title " , pdf - > stats . title - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . title - > data & & pdf - > stats . title - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . title - > data , pdf - > stats . title - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Title " , b64 ) ;
cli_jsonbool ( pdfobj , " Title_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " Title " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . subject ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . subject - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . subject - > meta . obj , pdf - > stats . subject - > data , pdf - > stats . subject - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . subject - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . subject - > data = out ;
pdf - > stats . subject - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . subject - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . subject - > meta . success & & cli_isutf8 ( pdf - > stats . subject - > data , pdf - > stats . subject - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Subject " , pdf - > stats . subject - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . subject - > data & & pdf - > stats . subject - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . subject - > data , pdf - > stats . subject - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Subject " , b64 ) ;
cli_jsonbool ( pdfobj , " Subject_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
cli_jsonstr ( pdfobj , " Subject " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
if ( pdf - > stats . keywords ) {
2015-04-01 17:41:59 -04:00
if ( ! pdf - > stats . keywords - > meta . success ) {
char * out = pdf_finalize_string ( pdf , pdf - > stats . keywords - > meta . obj , pdf - > stats . keywords - > data , pdf - > stats . keywords - > meta . length ) ;
if ( out ) {
free ( pdf - > stats . keywords - > data ) ;
2018-12-03 12:40:13 -05:00
pdf - > stats . keywords - > data = out ;
pdf - > stats . keywords - > meta . length = strlen ( out ) ;
2015-04-01 17:41:59 -04:00
pdf - > stats . keywords - > meta . success = 1 ;
2015-03-20 16:36:41 -04:00
}
2015-03-02 19:06:23 -05:00
}
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . keywords - > meta . success & & cli_isutf8 ( pdf - > stats . keywords - > data , pdf - > stats . keywords - > meta . length ) ) {
cli_jsonstr ( pdfobj , " Keywords " , pdf - > stats . keywords - > data ) ;
2015-04-08 11:09:52 -04:00
} else if ( pdf - > stats . keywords - > data & & pdf - > stats . keywords - > meta . length ) {
2015-04-14 15:53:17 -04:00
char * b64 = cl_base64_encode ( pdf - > stats . keywords - > data , pdf - > stats . keywords - > meta . length ) ;
2015-04-01 17:41:59 -04:00
cli_jsonstr ( pdfobj , " Keywords " , b64 ) ;
cli_jsonbool ( pdfobj , " Keywords_base64 " , 1 ) ;
free ( b64 ) ;
2015-04-14 15:53:17 -04:00
} else {
2015-04-14 16:02:37 -04:00
cli_jsonstr ( pdfobj , " Keywords " , " " ) ;
2015-04-01 17:41:59 -04:00
}
2015-03-02 19:06:23 -05:00
}
2014-04-29 17:27:02 -04:00
if ( pdf - > stats . ninvalidobjs )
cli_jsonint ( pdfobj , " InvalidObjectCount " , pdf - > stats . ninvalidobjs ) ;
if ( pdf - > stats . njs )
cli_jsonint ( pdfobj , " JavaScriptObjectCount " , pdf - > stats . njs ) ;
if ( pdf - > stats . nflate )
cli_jsonint ( pdfobj , " DeflateObjectCount " , pdf - > stats . nflate ) ;
if ( pdf - > stats . nactivex )
cli_jsonint ( pdfobj , " ActiveXObjectCount " , pdf - > stats . nactivex ) ;
if ( pdf - > stats . nflash )
cli_jsonint ( pdfobj , " FlashObjectCount " , pdf - > stats . nflash ) ;
if ( pdf - > stats . ncolors )
cli_jsonint ( pdfobj , " ColorCount " , pdf - > stats . ncolors ) ;
if ( pdf - > stats . nasciihexdecode )
cli_jsonint ( pdfobj , " AsciiHexDecodeObjectCount " , pdf - > stats . nasciihexdecode ) ;
if ( pdf - > stats . nascii85decode )
cli_jsonint ( pdfobj , " Ascii85DecodeObjectCount " , pdf - > stats . nascii85decode ) ;
if ( pdf - > stats . nembeddedfile )
cli_jsonint ( pdfobj , " EmbeddedFileCount " , pdf - > stats . nembeddedfile ) ;
if ( pdf - > stats . nimage )
cli_jsonint ( pdfobj , " ImageCount " , pdf - > stats . nimage ) ;
if ( pdf - > stats . nlzw )
cli_jsonint ( pdfobj , " LZWCount " , pdf - > stats . nlzw ) ;
if ( pdf - > stats . nrunlengthdecode )
cli_jsonint ( pdfobj , " RunLengthDecodeCount " , pdf - > stats . nrunlengthdecode ) ;
if ( pdf - > stats . nfaxdecode )
cli_jsonint ( pdfobj , " FaxDecodeCount " , pdf - > stats . nfaxdecode ) ;
if ( pdf - > stats . njbig2decode )
cli_jsonint ( pdfobj , " JBIG2DecodeCount " , pdf - > stats . njbig2decode ) ;
if ( pdf - > stats . ndctdecode )
cli_jsonint ( pdfobj , " DCTDecodeCount " , pdf - > stats . ndctdecode ) ;
if ( pdf - > stats . njpxdecode )
cli_jsonint ( pdfobj , " JPXDecodeCount " , pdf - > stats . njpxdecode ) ;
if ( pdf - > stats . ncrypt )
cli_jsonint ( pdfobj , " CryptCount " , pdf - > stats . ncrypt ) ;
if ( pdf - > stats . nstandard )
cli_jsonint ( pdfobj , " StandardCount " , pdf - > stats . nstandard ) ;
if ( pdf - > stats . nsigned )
cli_jsonint ( pdfobj , " SignedCount " , pdf - > stats . nsigned ) ;
if ( pdf - > stats . nopenaction )
cli_jsonint ( pdfobj , " OpenActionCount " , pdf - > stats . nopenaction ) ;
if ( pdf - > stats . nlaunch )
cli_jsonint ( pdfobj , " LaunchCount " , pdf - > stats . nlaunch ) ;
if ( pdf - > stats . npage )
cli_jsonint ( pdfobj , " PageCount " , pdf - > stats . npage ) ;
2014-06-27 12:43:23 -04:00
if ( pdf - > stats . nrichmedia )
cli_jsonint ( pdfobj , " RichMediaCount " , pdf - > stats . nrichmedia ) ;
if ( pdf - > stats . nacroform )
cli_jsonint ( pdfobj , " AcroFormCount " , pdf - > stats . nacroform ) ;
if ( pdf - > stats . nxfa )
cli_jsonint ( pdfobj , " XFACount " , pdf - > stats . nxfa ) ;
2014-04-30 16:38:55 -04:00
if ( pdf - > flags & ( 1 < < BAD_PDF_VERSION ) )
cli_jsonbool ( pdfobj , " BadVersion " , 1 ) ;
if ( pdf - > flags & ( 1 < < BAD_PDF_HEADERPOS ) )
cli_jsonbool ( pdfobj , " BadHeaderPosition " , 1 ) ;
if ( pdf - > flags & ( 1 < < BAD_PDF_TRAILER ) )
cli_jsonbool ( pdfobj , " BadTrailer " , 1 ) ;
if ( pdf - > flags & ( 1 < < BAD_PDF_TOOMANYOBJS ) )
cli_jsonbool ( pdfobj , " TooManyObjects " , 1 ) ;
if ( pdf - > flags & ( 1 < < ENCRYPTED_PDF ) ) {
cli_jsonbool ( pdfobj , " Encrypted " , 1 ) ;
if ( pdf - > flags & ( 1 < < DECRYPTABLE_PDF ) )
cli_jsonbool ( pdfobj , " Decryptable " , 1 ) ;
2015-03-20 16:44:14 -04:00
else
cli_jsonbool ( pdfobj , " Decryptable " , 0 ) ;
2014-04-30 16:38:55 -04:00
}
2014-06-10 13:52:15 -04:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < pdf - > nobjs ; i + + ) {
if ( pdf - > objs [ i ] - > flags & ( 1 < < OBJ_TRUNCATED ) ) {
2014-06-25 16:26:33 -04:00
json_object * truncobj ;
truncobj = cli_jsonarray ( pdfobj , " TruncatedObjects " ) ;
if ( ! ( truncobj ) )
continue ;
2018-08-14 14:00:31 -07:00
cli_jsonint_array ( truncobj , pdf - > objs [ i ] - > id > > 8 ) ;
2014-06-25 16:26:33 -04:00
}
}
2014-06-10 13:52:15 -04:00
cleanup :
if ( ( pdf - > stats . author ) ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . author - > data )
free ( pdf - > stats . author - > data ) ;
2014-06-10 13:52:15 -04:00
free ( pdf - > stats . author ) ;
pdf - > stats . author = NULL ;
}
if ( pdf - > stats . creator ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . creator - > data )
free ( pdf - > stats . creator - > data ) ;
2014-06-10 13:52:15 -04:00
free ( pdf - > stats . creator ) ;
pdf - > stats . creator = NULL ;
}
if ( pdf - > stats . producer ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . producer - > data )
free ( pdf - > stats . producer - > data ) ;
2014-06-10 13:52:15 -04:00
free ( pdf - > stats . producer ) ;
pdf - > stats . producer = NULL ;
}
if ( pdf - > stats . modificationdate ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . modificationdate - > data )
free ( pdf - > stats . modificationdate - > data ) ;
2014-06-10 13:52:15 -04:00
free ( pdf - > stats . modificationdate ) ;
pdf - > stats . modificationdate = NULL ;
}
if ( pdf - > stats . creationdate ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . creationdate - > data )
free ( pdf - > stats . creationdate - > data ) ;
2014-06-10 13:52:15 -04:00
free ( pdf - > stats . creationdate ) ;
pdf - > stats . creationdate = NULL ;
}
2014-06-13 11:18:07 -04:00
if ( pdf - > stats . title ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . title - > data )
free ( pdf - > stats . title - > data ) ;
2014-06-13 11:18:07 -04:00
free ( pdf - > stats . title ) ;
pdf - > stats . title = NULL ;
}
if ( pdf - > stats . subject ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . subject - > data )
free ( pdf - > stats . subject - > data ) ;
2014-06-13 11:18:07 -04:00
free ( pdf - > stats . subject ) ;
pdf - > stats . subject = NULL ;
}
if ( pdf - > stats . keywords ) {
2015-04-01 17:41:59 -04:00
if ( pdf - > stats . keywords - > data )
free ( pdf - > stats . keywords - > data ) ;
2014-06-13 11:18:07 -04:00
free ( pdf - > stats . keywords ) ;
pdf - > stats . keywords = NULL ;
}
2014-04-29 17:27:02 -04:00
}
2014-07-08 19:53:41 -04:00
# endif