Merge pull request #27 from cisco-sbg/CLAM-2752-pdf-overflow-1.4

Fix integer overflow in PDF parser (1.4.3)
This commit is contained in:
Val S. 2025-05-22 18:33:07 -04:00 committed by GitHub
commit bca003b028
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 87 additions and 51 deletions

View file

@ -440,7 +440,7 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) {
/* Failed to find obj offset for next obj */
cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found);
cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%zu} more.\n", objstm->n - objstm->nobjs_found);
status = CL_EPARSE;
goto done;
} else if (temp_long < 0) {
@ -1563,18 +1563,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
}
}
cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length);
cli_dbgmsg("pdf_extract_obj: calculated length %zu\n", length);
} else {
if (obj->stream_size > (size_t)length + 2) {
cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
(size_t)length, obj->stream_size);
length, obj->stream_size);
length = obj->stream_size;
}
}
if ((0 != orig_length) && (obj->stream_size > (size_t)orig_length + 20)) {
cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n",
(long long)orig_length, (long long)length, obj->stream_size);
if ((0 != orig_length) && (obj->stream_size > orig_length + 20)) {
cli_dbgmsg("pdf_extract_obj: orig length: %zu, length: %zu, size: %zu\n",
orig_length, length, obj->stream_size);
pdfobj_flag(pdf, obj, BAD_STREAMLEN);
}
@ -1628,18 +1628,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
*/
dict_len = obj->stream - start;
if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) {
int32_t objstm_first = -1;
int32_t objstm_length = -1;
int32_t objstm_n = -1;
int objstm_first = -1;
int objstm_length = -1;
int objstm_n = -1;
cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n");
dict_len = obj->stream - start;
if ((-1 == (objstm_first = pdf_readint(start, dict_len, "/First")))) {
if (-1 == (objstm_first = pdf_readint(start, dict_len, "/First"))) {
cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n");
} else if ((-1 == (objstm_length = pdf_readint(start, dict_len, "/Length")))) {
} else if (-1 == (objstm_length = pdf_readint(start, dict_len, "/Length"))) {
cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n");
} else if ((-1 == (objstm_n = pdf_readint(start, dict_len, "/N")))) {
} else if (-1 == (objstm_n = pdf_readint(start, dict_len, "/N"))) {
cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n");
} else {
/* Add objstm to pdf struct, so it can be freed eventually */
@ -1661,19 +1661,19 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
memset(objstm, 0, sizeof(*objstm));
objstm->first = (uint32_t)objstm_first;
objstm->current = (uint32_t)objstm_first;
objstm->first = (size_t)objstm_first;
objstm->current = (size_t)objstm_first;
objstm->current_pair = 0;
objstm->length = (uint32_t)objstm_length;
objstm->n = (uint32_t)objstm_n;
objstm->length = (size_t)objstm_length;
objstm->n = (size_t)objstm_n;
cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first);
cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length);
cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n);
cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %zu\n", objstm->first);
cli_dbgmsg("pdf_extract_obj: ObjStm length is %zu bytes\n", objstm->length);
cli_dbgmsg("pdf_extract_obj: ObjStm should contain %zu objects\n", objstm->n);
}
}
sum = pdf_decodestream(pdf, obj, dparams, obj->stream, (uint32_t)length, xref, fout, &rc, objstm);
sum = pdf_decodestream(pdf, obj, dparams, obj->stream, length, xref, fout, &rc, objstm);
if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
@ -3535,7 +3535,7 @@ cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objs
retval = pdf_findobj_in_objstm(pdf, objstm, &obj);
if (retval != CL_SUCCESS) {
if (retval != CL_BREAK) {
cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n",
cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %zu found, %zu expected.\n",
objstm->nobjs_found, objstm->n);
badobjects++;
pdf->stats.ninvalidobjs++;

View file

@ -27,14 +27,14 @@
#define PDF_OBJECT_RECURSION_LIMIT 25
struct objstm_struct {
uint32_t first; // offset of first obj
uint32_t current; // offset of current obj
uint32_t current_pair; // offset of current pair describing id, location of object
uint32_t length; // total length of all objects (starting at first)
uint32_t n; // number of objects that should be found in the object stream
uint32_t nobjs_found; // number of objects actually found in the object stream
char *streambuf; // address of stream buffer, beginning with first obj pair
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
size_t first; // offset of first obj
size_t current; // offset of current obj
size_t current_pair; // offset of current pair describing id, location of object
size_t length; // total length of all objects (starting at first)
size_t n; // number of objects that should be found in the object stream
size_t nobjs_found; // number of objects actually found in the object stream
char *streambuf; // address of stream buffer, beginning with first obj pair
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
};
struct pdf_obj {

View file

@ -73,7 +73,7 @@
struct pdf_token {
uint32_t flags; /* tracking flags */
uint32_t success; /* successfully decoded filters */
uint32_t length; /* length of current content; TODO: transition to size_t */
size_t length; /* length of current content; TODO: transition to size_t */
uint8_t *content; /* content stream */
};
@ -401,10 +401,16 @@ static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *o
uint32_t declen = 0;
const uint8_t *ptr = (uint8_t *)token->content;
uint32_t remaining = token->length;
size_t remaining = token->length;
int quintet = 0, rc = CL_SUCCESS;
uint64_t sum = 0;
/* Check for overflow */
if (remaining > (SIZE_MAX / 4)) {
cli_dbgmsg("cli_pdf: ascii85decode: overflow detected\n");
return CL_EFORMAT;
}
/* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */
if (!(dptr = decoded = (uint8_t *)cli_max_malloc((4 * remaining) + 1))) {
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
@ -791,8 +797,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
uint8_t *decoded;
const uint8_t *content = (uint8_t *)token->content;
uint32_t length = token->length;
uint32_t i, j;
size_t length = token->length;
size_t i, j;
cl_error_t rc = CL_SUCCESS;
if (!(decoded = (uint8_t *)cli_max_calloc(length / 2 + 1, sizeof(uint8_t)))) {
@ -822,8 +828,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
if (rc == CL_SUCCESS) {
free(token->content);
cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n",
(unsigned long)j, (unsigned long)(token->length));
cli_dbgmsg("cli_pdf: deflated %zu bytes from %zu total bytes\n",
j, token->length);
token->content = decoded;
token->length = j;
@ -831,8 +837,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n",
(unsigned long)i, (unsigned long)(token->length));
cli_dbgmsg("cli_pdf: error occurred parsing byte %zu of %zu\n",
i, token->length);
free(decoded);
}
return rc;
@ -873,27 +879,29 @@ static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, st
return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */
}
cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n",
cli_dbgmsg("cli_pdf: decrypted %zu bytes from %zu total bytes\n",
length, token->length);
free(token->content);
token->content = (uint8_t *)decrypted;
token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */
token->length = length;
return CL_SUCCESS;
}
static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
{
uint8_t *decoded, *temp;
uint32_t declen = 0, capacity = 0;
size_t declen = 0, capacity = 0;
uint8_t *content = (uint8_t *)token->content;
uint32_t length = token->length;
lzw_stream stream;
int echg = 1, lzwstat, rc = CL_SUCCESS;
if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW))
return CL_BREAK;
if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) {
rc = CL_BREAK;
goto done;
}
if (params) {
struct pdf_dict_node *node = params->nodes;
@ -924,15 +932,18 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
* Sample 0015315109, it has \r followed by zlib header.
* Flag pdf as suspicious, and attempt to extract by skipping the \r.
*/
if (!length)
return CL_SUCCESS;
if (!length) {
rc = CL_SUCCESS;
goto done;
}
}
capacity = INFLATE_CHUNK_SIZE;
if (!(decoded = (uint8_t *)malloc(capacity))) {
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
return CL_EMEM;
rc = CL_EMEM;
goto done;
}
memset(&stream, 0, sizeof(stream));
@ -947,7 +958,8 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
if (lzwstat != Z_OK) {
cli_warnmsg("cli_pdf: lzwInit failed\n");
free(decoded);
return CL_EMEM;
rc = CL_EMEM;
goto done;
}
/* initial inflate */
@ -962,16 +974,23 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
length -= q - content;
content = q;
stream.next_in = (Bytef *)content;
stream.avail_in = length;
stream.next_out = (Bytef *)decoded;
stream.next_in = (Bytef *)content;
stream.avail_in = length;
stream.next_out = (Bytef *)decoded;
/* Make sure we don't overflow during type conversion */
if (capacity > UINT_MAX) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
stream.avail_out = capacity;
lzwstat = lzwInit(&stream);
if (lzwstat != Z_OK) {
cli_warnmsg("cli_pdf: lzwInit failed\n");
free(decoded);
return CL_EMEM;
rc = CL_EMEM;
goto done;
}
pdfobj_flag(pdf, obj, BAD_FLATESTART);
@ -984,7 +1003,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
/* extend output capacity if needed,*/
if (stream.avail_out == 0) {
if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + INFLATE_CHUNK_SIZE, 0, 0)) != CL_SUCCESS) {
cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %u\n", capacity + INFLATE_CHUNK_SIZE);
cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %zu\n", capacity + INFLATE_CHUNK_SIZE);
break;
}
@ -996,7 +1015,17 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
decoded = temp;
stream.next_out = decoded + capacity;
stream.avail_out = INFLATE_CHUNK_SIZE;
if (declen > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
declen += INFLATE_CHUNK_SIZE;
if (capacity > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
capacity += INFLATE_CHUNK_SIZE;
}
@ -1004,6 +1033,12 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
lzwstat = lzwInflate(&stream);
}
if (declen > (UINT32_MAX - (INFLATE_CHUNK_SIZE - stream.avail_out))) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
/* add stream end fragment to decoded length */
declen += (INFLATE_CHUNK_SIZE - stream.avail_out);
@ -1044,6 +1079,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
(void)lzwInflateEnd(&stream);
done:
if (rc == CL_SUCCESS) {
if (declen == 0) {
cli_dbgmsg("cli_pdf: empty stream after inflation completed.\n");