mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 10:23:17 +00:00
Merge pull request #27 from cisco-sbg/CLAM-2752-pdf-overflow-1.4
Fix integer overflow in PDF parser (1.4.3)
This commit is contained in:
commit
bca003b028
3 changed files with 87 additions and 51 deletions
|
@ -440,7 +440,7 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
|
|||
|
||||
if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) {
|
||||
/* Failed to find obj offset for next obj */
|
||||
cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found);
|
||||
cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%zu} more.\n", objstm->n - objstm->nobjs_found);
|
||||
status = CL_EPARSE;
|
||||
goto done;
|
||||
} else if (temp_long < 0) {
|
||||
|
@ -1563,18 +1563,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
|
|||
}
|
||||
}
|
||||
|
||||
cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length);
|
||||
cli_dbgmsg("pdf_extract_obj: calculated length %zu\n", length);
|
||||
} else {
|
||||
if (obj->stream_size > (size_t)length + 2) {
|
||||
cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
|
||||
(size_t)length, obj->stream_size);
|
||||
length, obj->stream_size);
|
||||
length = obj->stream_size;
|
||||
}
|
||||
}
|
||||
|
||||
if ((0 != orig_length) && (obj->stream_size > (size_t)orig_length + 20)) {
|
||||
cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n",
|
||||
(long long)orig_length, (long long)length, obj->stream_size);
|
||||
if ((0 != orig_length) && (obj->stream_size > orig_length + 20)) {
|
||||
cli_dbgmsg("pdf_extract_obj: orig length: %zu, length: %zu, size: %zu\n",
|
||||
orig_length, length, obj->stream_size);
|
||||
pdfobj_flag(pdf, obj, BAD_STREAMLEN);
|
||||
}
|
||||
|
||||
|
@ -1628,18 +1628,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
|
|||
*/
|
||||
dict_len = obj->stream - start;
|
||||
if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) {
|
||||
int32_t objstm_first = -1;
|
||||
int32_t objstm_length = -1;
|
||||
int32_t objstm_n = -1;
|
||||
int objstm_first = -1;
|
||||
int objstm_length = -1;
|
||||
int objstm_n = -1;
|
||||
|
||||
cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n");
|
||||
|
||||
dict_len = obj->stream - start;
|
||||
if ((-1 == (objstm_first = pdf_readint(start, dict_len, "/First")))) {
|
||||
if (-1 == (objstm_first = pdf_readint(start, dict_len, "/First"))) {
|
||||
cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n");
|
||||
} else if ((-1 == (objstm_length = pdf_readint(start, dict_len, "/Length")))) {
|
||||
} else if (-1 == (objstm_length = pdf_readint(start, dict_len, "/Length"))) {
|
||||
cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n");
|
||||
} else if ((-1 == (objstm_n = pdf_readint(start, dict_len, "/N")))) {
|
||||
} else if (-1 == (objstm_n = pdf_readint(start, dict_len, "/N"))) {
|
||||
cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n");
|
||||
} else {
|
||||
/* Add objstm to pdf struct, so it can be freed eventually */
|
||||
|
@ -1661,19 +1661,19 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
|
|||
|
||||
memset(objstm, 0, sizeof(*objstm));
|
||||
|
||||
objstm->first = (uint32_t)objstm_first;
|
||||
objstm->current = (uint32_t)objstm_first;
|
||||
objstm->first = (size_t)objstm_first;
|
||||
objstm->current = (size_t)objstm_first;
|
||||
objstm->current_pair = 0;
|
||||
objstm->length = (uint32_t)objstm_length;
|
||||
objstm->n = (uint32_t)objstm_n;
|
||||
objstm->length = (size_t)objstm_length;
|
||||
objstm->n = (size_t)objstm_n;
|
||||
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first);
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length);
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n);
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %zu\n", objstm->first);
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm length is %zu bytes\n", objstm->length);
|
||||
cli_dbgmsg("pdf_extract_obj: ObjStm should contain %zu objects\n", objstm->n);
|
||||
}
|
||||
}
|
||||
|
||||
sum = pdf_decodestream(pdf, obj, dparams, obj->stream, (uint32_t)length, xref, fout, &rc, objstm);
|
||||
sum = pdf_decodestream(pdf, obj, dparams, obj->stream, length, xref, fout, &rc, objstm);
|
||||
if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
|
||||
cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
|
||||
|
||||
|
@ -3535,7 +3535,7 @@ cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objs
|
|||
retval = pdf_findobj_in_objstm(pdf, objstm, &obj);
|
||||
if (retval != CL_SUCCESS) {
|
||||
if (retval != CL_BREAK) {
|
||||
cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n",
|
||||
cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %zu found, %zu expected.\n",
|
||||
objstm->nobjs_found, objstm->n);
|
||||
badobjects++;
|
||||
pdf->stats.ninvalidobjs++;
|
||||
|
|
|
@ -27,14 +27,14 @@
|
|||
#define PDF_OBJECT_RECURSION_LIMIT 25
|
||||
|
||||
struct objstm_struct {
|
||||
uint32_t first; // offset of first obj
|
||||
uint32_t current; // offset of current obj
|
||||
uint32_t current_pair; // offset of current pair describing id, location of object
|
||||
uint32_t length; // total length of all objects (starting at first)
|
||||
uint32_t n; // number of objects that should be found in the object stream
|
||||
uint32_t nobjs_found; // number of objects actually found in the object stream
|
||||
char *streambuf; // address of stream buffer, beginning with first obj pair
|
||||
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
|
||||
size_t first; // offset of first obj
|
||||
size_t current; // offset of current obj
|
||||
size_t current_pair; // offset of current pair describing id, location of object
|
||||
size_t length; // total length of all objects (starting at first)
|
||||
size_t n; // number of objects that should be found in the object stream
|
||||
size_t nobjs_found; // number of objects actually found in the object stream
|
||||
char *streambuf; // address of stream buffer, beginning with first obj pair
|
||||
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
|
||||
};
|
||||
|
||||
struct pdf_obj {
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
struct pdf_token {
|
||||
uint32_t flags; /* tracking flags */
|
||||
uint32_t success; /* successfully decoded filters */
|
||||
uint32_t length; /* length of current content; TODO: transition to size_t */
|
||||
size_t length; /* length of current content; TODO: transition to size_t */
|
||||
uint8_t *content; /* content stream */
|
||||
};
|
||||
|
||||
|
@ -401,10 +401,16 @@ static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *o
|
|||
uint32_t declen = 0;
|
||||
|
||||
const uint8_t *ptr = (uint8_t *)token->content;
|
||||
uint32_t remaining = token->length;
|
||||
size_t remaining = token->length;
|
||||
int quintet = 0, rc = CL_SUCCESS;
|
||||
uint64_t sum = 0;
|
||||
|
||||
/* Check for overflow */
|
||||
if (remaining > (SIZE_MAX / 4)) {
|
||||
cli_dbgmsg("cli_pdf: ascii85decode: overflow detected\n");
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
|
||||
/* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */
|
||||
if (!(dptr = decoded = (uint8_t *)cli_max_malloc((4 * remaining) + 1))) {
|
||||
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
|
||||
|
@ -791,8 +797,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
|
|||
uint8_t *decoded;
|
||||
|
||||
const uint8_t *content = (uint8_t *)token->content;
|
||||
uint32_t length = token->length;
|
||||
uint32_t i, j;
|
||||
size_t length = token->length;
|
||||
size_t i, j;
|
||||
cl_error_t rc = CL_SUCCESS;
|
||||
|
||||
if (!(decoded = (uint8_t *)cli_max_calloc(length / 2 + 1, sizeof(uint8_t)))) {
|
||||
|
@ -822,8 +828,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
|
|||
if (rc == CL_SUCCESS) {
|
||||
free(token->content);
|
||||
|
||||
cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n",
|
||||
(unsigned long)j, (unsigned long)(token->length));
|
||||
cli_dbgmsg("cli_pdf: deflated %zu bytes from %zu total bytes\n",
|
||||
j, token->length);
|
||||
|
||||
token->content = decoded;
|
||||
token->length = j;
|
||||
|
@ -831,8 +837,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
|
|||
if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
|
||||
pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
|
||||
|
||||
cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
|
||||
(unsigned long)i, (unsigned long)(token->length));
|
||||
cli_dbgmsg("cli_pdf: error occurred parsing byte %zu of %zu\n",
|
||||
i, token->length);
|
||||
free(decoded);
|
||||
}
|
||||
return rc;
|
||||
|
@ -873,27 +879,29 @@ static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, st
|
|||
return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */
|
||||
}
|
||||
|
||||
cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n",
|
||||
cli_dbgmsg("cli_pdf: decrypted %zu bytes from %zu total bytes\n",
|
||||
length, token->length);
|
||||
|
||||
free(token->content);
|
||||
token->content = (uint8_t *)decrypted;
|
||||
token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */
|
||||
token->length = length;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
|
||||
{
|
||||
uint8_t *decoded, *temp;
|
||||
uint32_t declen = 0, capacity = 0;
|
||||
size_t declen = 0, capacity = 0;
|
||||
|
||||
uint8_t *content = (uint8_t *)token->content;
|
||||
uint32_t length = token->length;
|
||||
lzw_stream stream;
|
||||
int echg = 1, lzwstat, rc = CL_SUCCESS;
|
||||
|
||||
if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW))
|
||||
return CL_BREAK;
|
||||
if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) {
|
||||
rc = CL_BREAK;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (params) {
|
||||
struct pdf_dict_node *node = params->nodes;
|
||||
|
@ -924,15 +932,18 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
* Sample 0015315109, it has \r followed by zlib header.
|
||||
* Flag pdf as suspicious, and attempt to extract by skipping the \r.
|
||||
*/
|
||||
if (!length)
|
||||
return CL_SUCCESS;
|
||||
if (!length) {
|
||||
rc = CL_SUCCESS;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
capacity = INFLATE_CHUNK_SIZE;
|
||||
|
||||
if (!(decoded = (uint8_t *)malloc(capacity))) {
|
||||
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
|
||||
return CL_EMEM;
|
||||
rc = CL_EMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
@ -947,7 +958,8 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
if (lzwstat != Z_OK) {
|
||||
cli_warnmsg("cli_pdf: lzwInit failed\n");
|
||||
free(decoded);
|
||||
return CL_EMEM;
|
||||
rc = CL_EMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* initial inflate */
|
||||
|
@ -962,16 +974,23 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
length -= q - content;
|
||||
content = q;
|
||||
|
||||
stream.next_in = (Bytef *)content;
|
||||
stream.avail_in = length;
|
||||
stream.next_out = (Bytef *)decoded;
|
||||
stream.next_in = (Bytef *)content;
|
||||
stream.avail_in = length;
|
||||
stream.next_out = (Bytef *)decoded;
|
||||
/* Make sure we don't overflow during type conversion */
|
||||
if (capacity > UINT_MAX) {
|
||||
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
|
||||
rc = CL_EFORMAT;
|
||||
goto done;
|
||||
}
|
||||
stream.avail_out = capacity;
|
||||
|
||||
lzwstat = lzwInit(&stream);
|
||||
if (lzwstat != Z_OK) {
|
||||
cli_warnmsg("cli_pdf: lzwInit failed\n");
|
||||
free(decoded);
|
||||
return CL_EMEM;
|
||||
rc = CL_EMEM;
|
||||
goto done;
|
||||
}
|
||||
|
||||
pdfobj_flag(pdf, obj, BAD_FLATESTART);
|
||||
|
@ -984,7 +1003,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
/* extend output capacity if needed,*/
|
||||
if (stream.avail_out == 0) {
|
||||
if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + INFLATE_CHUNK_SIZE, 0, 0)) != CL_SUCCESS) {
|
||||
cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %u\n", capacity + INFLATE_CHUNK_SIZE);
|
||||
cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %zu\n", capacity + INFLATE_CHUNK_SIZE);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -996,7 +1015,17 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
decoded = temp;
|
||||
stream.next_out = decoded + capacity;
|
||||
stream.avail_out = INFLATE_CHUNK_SIZE;
|
||||
if (declen > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
|
||||
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
|
||||
rc = CL_EFORMAT;
|
||||
goto done;
|
||||
}
|
||||
declen += INFLATE_CHUNK_SIZE;
|
||||
if (capacity > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
|
||||
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
|
||||
rc = CL_EFORMAT;
|
||||
goto done;
|
||||
}
|
||||
capacity += INFLATE_CHUNK_SIZE;
|
||||
}
|
||||
|
||||
|
@ -1004,6 +1033,12 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
lzwstat = lzwInflate(&stream);
|
||||
}
|
||||
|
||||
if (declen > (UINT32_MAX - (INFLATE_CHUNK_SIZE - stream.avail_out))) {
|
||||
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
|
||||
rc = CL_EFORMAT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* add stream end fragment to decoded length */
|
||||
declen += (INFLATE_CHUNK_SIZE - stream.avail_out);
|
||||
|
||||
|
@ -1044,6 +1079,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
|
|||
|
||||
(void)lzwInflateEnd(&stream);
|
||||
|
||||
done:
|
||||
if (rc == CL_SUCCESS) {
|
||||
if (declen == 0) {
|
||||
cli_dbgmsg("cli_pdf: empty stream after inflation completed.\n");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue