Merge pull request #27 from cisco-sbg/CLAM-2752-pdf-overflow-1.4

Fix integer overflow in PDF parser (1.4.3)
This commit is contained in:
Val S. 2025-05-22 18:33:07 -04:00 committed by GitHub
commit bca003b028
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 87 additions and 51 deletions

View file

@ -440,7 +440,7 @@ int pdf_findobj_in_objstm(struct pdf_struct *pdf, struct objstm_struct *objstm,
if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) { if (CL_SUCCESS != cli_strntol_wrap(index, bytes_remaining, 0, 10, &temp_long)) {
/* Failed to find obj offset for next obj */ /* Failed to find obj offset for next obj */
cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%u} more.\n", objstm->n - objstm->nobjs_found); cli_dbgmsg("pdf_findobj_in_objstm: Failed to find next obj offset for obj in object stream though there should be {%zu} more.\n", objstm->n - objstm->nobjs_found);
status = CL_EPARSE; status = CL_EPARSE;
goto done; goto done;
} else if (temp_long < 0) { } else if (temp_long < 0) {
@ -1563,18 +1563,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
} }
} }
cli_dbgmsg("pdf_extract_obj: calculated length %lld\n", (long long)length); cli_dbgmsg("pdf_extract_obj: calculated length %zu\n", length);
} else { } else {
if (obj->stream_size > (size_t)length + 2) { if (obj->stream_size > (size_t)length + 2) {
cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n", cli_dbgmsg("cli_pdf: calculated length %zu < %zu\n",
(size_t)length, obj->stream_size); length, obj->stream_size);
length = obj->stream_size; length = obj->stream_size;
} }
} }
if ((0 != orig_length) && (obj->stream_size > (size_t)orig_length + 20)) { if ((0 != orig_length) && (obj->stream_size > orig_length + 20)) {
cli_dbgmsg("pdf_extract_obj: orig length: %lld, length: %lld, size: %zu\n", cli_dbgmsg("pdf_extract_obj: orig length: %zu, length: %zu, size: %zu\n",
(long long)orig_length, (long long)length, obj->stream_size); orig_length, length, obj->stream_size);
pdfobj_flag(pdf, obj, BAD_STREAMLEN); pdfobj_flag(pdf, obj, BAD_STREAMLEN);
} }
@ -1628,18 +1628,18 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
*/ */
dict_len = obj->stream - start; dict_len = obj->stream - start;
if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) { if (NULL != (pstr = pdf_getdict(start, &dict_len, "/Type/ObjStm"))) {
int32_t objstm_first = -1; int objstm_first = -1;
int32_t objstm_length = -1; int objstm_length = -1;
int32_t objstm_n = -1; int objstm_n = -1;
cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n"); cli_dbgmsg("pdf_extract_obj: Found /Type/ObjStm\n");
dict_len = obj->stream - start; dict_len = obj->stream - start;
if ((-1 == (objstm_first = pdf_readint(start, dict_len, "/First")))) { if (-1 == (objstm_first = pdf_readint(start, dict_len, "/First"))) {
cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n"); cli_warnmsg("pdf_extract_obj: Failed to find offset of first object in object stream\n");
} else if ((-1 == (objstm_length = pdf_readint(start, dict_len, "/Length")))) { } else if (-1 == (objstm_length = pdf_readint(start, dict_len, "/Length"))) {
cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n"); cli_warnmsg("pdf_extract_obj: Failed to find length of object stream\n");
} else if ((-1 == (objstm_n = pdf_readint(start, dict_len, "/N")))) { } else if (-1 == (objstm_n = pdf_readint(start, dict_len, "/N"))) {
cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n"); cli_warnmsg("pdf_extract_obj: Failed to find num objects in object stream\n");
} else { } else {
/* Add objstm to pdf struct, so it can be freed eventually */ /* Add objstm to pdf struct, so it can be freed eventually */
@ -1661,19 +1661,19 @@ cl_error_t pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t
memset(objstm, 0, sizeof(*objstm)); memset(objstm, 0, sizeof(*objstm));
objstm->first = (uint32_t)objstm_first; objstm->first = (size_t)objstm_first;
objstm->current = (uint32_t)objstm_first; objstm->current = (size_t)objstm_first;
objstm->current_pair = 0; objstm->current_pair = 0;
objstm->length = (uint32_t)objstm_length; objstm->length = (size_t)objstm_length;
objstm->n = (uint32_t)objstm_n; objstm->n = (size_t)objstm_n;
cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %d\n", objstm->first); cli_dbgmsg("pdf_extract_obj: ObjStm first obj at offset %zu\n", objstm->first);
cli_dbgmsg("pdf_extract_obj: ObjStm length is %d bytes\n", objstm->length); cli_dbgmsg("pdf_extract_obj: ObjStm length is %zu bytes\n", objstm->length);
cli_dbgmsg("pdf_extract_obj: ObjStm should contain %d objects\n", objstm->n); cli_dbgmsg("pdf_extract_obj: ObjStm should contain %zu objects\n", objstm->n);
} }
} }
sum = pdf_decodestream(pdf, obj, dparams, obj->stream, (uint32_t)length, xref, fout, &rc, objstm); sum = pdf_decodestream(pdf, obj, dparams, obj->stream, length, xref, fout, &rc, objstm);
if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) { if ((CL_SUCCESS != rc) && (CL_VIRUS != rc)) {
cli_dbgmsg("Error decoding stream! Error code: %d\n", rc); cli_dbgmsg("Error decoding stream! Error code: %d\n", rc);
@ -3535,7 +3535,7 @@ cl_error_t pdf_find_and_parse_objs_in_objstm(struct pdf_struct *pdf, struct objs
retval = pdf_findobj_in_objstm(pdf, objstm, &obj); retval = pdf_findobj_in_objstm(pdf, objstm, &obj);
if (retval != CL_SUCCESS) { if (retval != CL_SUCCESS) {
if (retval != CL_BREAK) { if (retval != CL_BREAK) {
cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %u found, %u expected.\n", cli_dbgmsg("pdf_find_and_parse_objs_in_objstm: Fewer objects in stream than expected: %zu found, %zu expected.\n",
objstm->nobjs_found, objstm->n); objstm->nobjs_found, objstm->n);
badobjects++; badobjects++;
pdf->stats.ninvalidobjs++; pdf->stats.ninvalidobjs++;

View file

@ -27,14 +27,14 @@
#define PDF_OBJECT_RECURSION_LIMIT 25 #define PDF_OBJECT_RECURSION_LIMIT 25
struct objstm_struct { struct objstm_struct {
uint32_t first; // offset of first obj size_t first; // offset of first obj
uint32_t current; // offset of current obj size_t current; // offset of current obj
uint32_t current_pair; // offset of current pair describing id, location of object size_t current_pair; // offset of current pair describing id, location of object
uint32_t length; // total length of all objects (starting at first) size_t length; // total length of all objects (starting at first)
uint32_t n; // number of objects that should be found in the object stream size_t n; // number of objects that should be found in the object stream
uint32_t nobjs_found; // number of objects actually found in the object stream size_t nobjs_found; // number of objects actually found in the object stream
char *streambuf; // address of stream buffer, beginning with first obj pair char *streambuf; // address of stream buffer, beginning with first obj pair
size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects size_t streambuf_len; // length of stream buffer, includes pairs followed by actual objects
}; };
struct pdf_obj { struct pdf_obj {

View file

@ -73,7 +73,7 @@
struct pdf_token { struct pdf_token {
uint32_t flags; /* tracking flags */ uint32_t flags; /* tracking flags */
uint32_t success; /* successfully decoded filters */ uint32_t success; /* successfully decoded filters */
uint32_t length; /* length of current content; TODO: transition to size_t */ size_t length; /* length of current content; TODO: transition to size_t */
uint8_t *content; /* content stream */ uint8_t *content; /* content stream */
}; };
@ -401,10 +401,16 @@ static cl_error_t filter_ascii85decode(struct pdf_struct *pdf, struct pdf_obj *o
uint32_t declen = 0; uint32_t declen = 0;
const uint8_t *ptr = (uint8_t *)token->content; const uint8_t *ptr = (uint8_t *)token->content;
uint32_t remaining = token->length; size_t remaining = token->length;
int quintet = 0, rc = CL_SUCCESS; int quintet = 0, rc = CL_SUCCESS;
uint64_t sum = 0; uint64_t sum = 0;
/* Check for overflow */
if (remaining > (SIZE_MAX / 4)) {
cli_dbgmsg("cli_pdf: ascii85decode: overflow detected\n");
return CL_EFORMAT;
}
/* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */ /* 5:4 decoding ratio, with 1:4 expansion sequences => (4*length)+1 */
if (!(dptr = decoded = (uint8_t *)cli_max_malloc((4 * remaining) + 1))) { if (!(dptr = decoded = (uint8_t *)cli_max_malloc((4 * remaining) + 1))) {
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
@ -791,8 +797,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
uint8_t *decoded; uint8_t *decoded;
const uint8_t *content = (uint8_t *)token->content; const uint8_t *content = (uint8_t *)token->content;
uint32_t length = token->length; size_t length = token->length;
uint32_t i, j; size_t i, j;
cl_error_t rc = CL_SUCCESS; cl_error_t rc = CL_SUCCESS;
if (!(decoded = (uint8_t *)cli_max_calloc(length / 2 + 1, sizeof(uint8_t)))) { if (!(decoded = (uint8_t *)cli_max_calloc(length / 2 + 1, sizeof(uint8_t)))) {
@ -822,8 +828,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
if (rc == CL_SUCCESS) { if (rc == CL_SUCCESS) {
free(token->content); free(token->content);
cli_dbgmsg("cli_pdf: deflated %lu bytes from %lu total bytes\n", cli_dbgmsg("cli_pdf: deflated %zu bytes from %zu total bytes\n",
(unsigned long)j, (unsigned long)(token->length)); j, token->length);
token->content = decoded; token->content = decoded;
token->length = j; token->length = j;
@ -831,8 +837,8 @@ static cl_error_t filter_asciihexdecode(struct pdf_struct *pdf, struct pdf_obj *
if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED)))) if (!(obj->flags & ((1 << OBJ_IMAGE) | (1 << OBJ_TRUNCATED))))
pdfobj_flag(pdf, obj, BAD_ASCIIDECODE); pdfobj_flag(pdf, obj, BAD_ASCIIDECODE);
cli_dbgmsg("cli_pdf: error occurred parsing byte %lu of %lu\n", cli_dbgmsg("cli_pdf: error occurred parsing byte %zu of %zu\n",
(unsigned long)i, (unsigned long)(token->length)); i, token->length);
free(decoded); free(decoded);
} }
return rc; return rc;
@ -873,27 +879,29 @@ static cl_error_t filter_decrypt(struct pdf_struct *pdf, struct pdf_obj *obj, st
return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */ return CL_EPARSE; /* TODO: what should this value be? CL_SUCCESS would mirror previous behavior */
} }
cli_dbgmsg("cli_pdf: decrypted %zu bytes from %u total bytes\n", cli_dbgmsg("cli_pdf: decrypted %zu bytes from %zu total bytes\n",
length, token->length); length, token->length);
free(token->content); free(token->content);
token->content = (uint8_t *)decrypted; token->content = (uint8_t *)decrypted;
token->length = (uint32_t)length; /* this may truncate unfortunately, TODO: use 64-bit values internally? */ token->length = length;
return CL_SUCCESS; return CL_SUCCESS;
} }
static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token) static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdf_dict *params, struct pdf_token *token)
{ {
uint8_t *decoded, *temp; uint8_t *decoded, *temp;
uint32_t declen = 0, capacity = 0; size_t declen = 0, capacity = 0;
uint8_t *content = (uint8_t *)token->content; uint8_t *content = (uint8_t *)token->content;
uint32_t length = token->length; uint32_t length = token->length;
lzw_stream stream; lzw_stream stream;
int echg = 1, lzwstat, rc = CL_SUCCESS; int echg = 1, lzwstat, rc = CL_SUCCESS;
if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) if (pdf->ctx && !(pdf->ctx->dconf->other & OTHER_CONF_LZW)) {
return CL_BREAK; rc = CL_BREAK;
goto done;
}
if (params) { if (params) {
struct pdf_dict_node *node = params->nodes; struct pdf_dict_node *node = params->nodes;
@ -924,15 +932,18 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
* Sample 0015315109, it has \r followed by zlib header. * Sample 0015315109, it has \r followed by zlib header.
* Flag pdf as suspicious, and attempt to extract by skipping the \r. * Flag pdf as suspicious, and attempt to extract by skipping the \r.
*/ */
if (!length) if (!length) {
return CL_SUCCESS; rc = CL_SUCCESS;
goto done;
}
} }
capacity = INFLATE_CHUNK_SIZE; capacity = INFLATE_CHUNK_SIZE;
if (!(decoded = (uint8_t *)malloc(capacity))) { if (!(decoded = (uint8_t *)malloc(capacity))) {
cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n"); cli_errmsg("cli_pdf: cannot allocate memory for decoded output\n");
return CL_EMEM; rc = CL_EMEM;
goto done;
} }
memset(&stream, 0, sizeof(stream)); memset(&stream, 0, sizeof(stream));
@ -947,7 +958,8 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
if (lzwstat != Z_OK) { if (lzwstat != Z_OK) {
cli_warnmsg("cli_pdf: lzwInit failed\n"); cli_warnmsg("cli_pdf: lzwInit failed\n");
free(decoded); free(decoded);
return CL_EMEM; rc = CL_EMEM;
goto done;
} }
/* initial inflate */ /* initial inflate */
@ -962,16 +974,23 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
length -= q - content; length -= q - content;
content = q; content = q;
stream.next_in = (Bytef *)content; stream.next_in = (Bytef *)content;
stream.avail_in = length; stream.avail_in = length;
stream.next_out = (Bytef *)decoded; stream.next_out = (Bytef *)decoded;
/* Make sure we don't overflow during type conversion */
if (capacity > UINT_MAX) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
stream.avail_out = capacity; stream.avail_out = capacity;
lzwstat = lzwInit(&stream); lzwstat = lzwInit(&stream);
if (lzwstat != Z_OK) { if (lzwstat != Z_OK) {
cli_warnmsg("cli_pdf: lzwInit failed\n"); cli_warnmsg("cli_pdf: lzwInit failed\n");
free(decoded); free(decoded);
return CL_EMEM; rc = CL_EMEM;
goto done;
} }
pdfobj_flag(pdf, obj, BAD_FLATESTART); pdfobj_flag(pdf, obj, BAD_FLATESTART);
@ -984,7 +1003,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
/* extend output capacity if needed,*/ /* extend output capacity if needed,*/
if (stream.avail_out == 0) { if (stream.avail_out == 0) {
if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + INFLATE_CHUNK_SIZE, 0, 0)) != CL_SUCCESS) { if ((rc = cli_checklimits("pdf", pdf->ctx, capacity + INFLATE_CHUNK_SIZE, 0, 0)) != CL_SUCCESS) {
cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %u\n", capacity + INFLATE_CHUNK_SIZE); cli_dbgmsg("cli_pdf: required buffer size to inflate compressed filter exceeds maximum: %zu\n", capacity + INFLATE_CHUNK_SIZE);
break; break;
} }
@ -996,7 +1015,17 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
decoded = temp; decoded = temp;
stream.next_out = decoded + capacity; stream.next_out = decoded + capacity;
stream.avail_out = INFLATE_CHUNK_SIZE; stream.avail_out = INFLATE_CHUNK_SIZE;
if (declen > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
declen += INFLATE_CHUNK_SIZE; declen += INFLATE_CHUNK_SIZE;
if (capacity > (SIZE_MAX - INFLATE_CHUNK_SIZE)) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
capacity += INFLATE_CHUNK_SIZE; capacity += INFLATE_CHUNK_SIZE;
} }
@ -1004,6 +1033,12 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
lzwstat = lzwInflate(&stream); lzwstat = lzwInflate(&stream);
} }
if (declen > (UINT32_MAX - (INFLATE_CHUNK_SIZE - stream.avail_out))) {
cli_dbgmsg("cli_pdf: lzwdecode: overflow detected\n");
rc = CL_EFORMAT;
goto done;
}
/* add stream end fragment to decoded length */ /* add stream end fragment to decoded length */
declen += (INFLATE_CHUNK_SIZE - stream.avail_out); declen += (INFLATE_CHUNK_SIZE - stream.avail_out);
@ -1044,6 +1079,7 @@ static cl_error_t filter_lzwdecode(struct pdf_struct *pdf, struct pdf_obj *obj,
(void)lzwInflateEnd(&stream); (void)lzwInflateEnd(&stream);
done:
if (rc == CL_SUCCESS) { if (rc == CL_SUCCESS) {
if (declen == 0) { if (declen == 0) {
cli_dbgmsg("cli_pdf: empty stream after inflation completed.\n"); cli_dbgmsg("cli_pdf: empty stream after inflation completed.\n");