2021-04-30 18:33:13 -07:00
|
|
|
/*
|
2022-03-04 19:53:38 -07:00
|
|
|
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
|
2021-04-30 18:33:13 -07:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <AK/ScopeGuard.h>
|
2021-05-24 08:15:43 -07:00
|
|
|
#include <LibPDF/CommonNames.h>
|
2021-04-30 18:33:13 -07:00
|
|
|
#include <LibPDF/Document.h>
|
2021-05-22 20:44:18 -07:00
|
|
|
#include <LibPDF/Filter.h>
|
2021-04-30 18:33:13 -07:00
|
|
|
#include <LibPDF/Parser.h>
|
2021-05-23 21:27:17 -07:00
|
|
|
#include <LibTextCodec/Decoder.h>
|
2021-04-30 18:33:13 -07:00
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
|
|
namespace PDF {
|
|
|
|
|
|
2022-03-25 15:00:11 -07:00
|
|
|
PDFErrorOr<Vector<Operator>> Parser::parse_operators(Document* document, ReadonlyBytes bytes)
|
2021-05-10 10:39:19 -07:00
|
|
|
{
|
2022-08-15 11:45:24 +02:00
|
|
|
Parser parser(document, bytes);
|
|
|
|
|
parser.m_disable_encryption = true;
|
|
|
|
|
return parser.parse_operators();
|
2021-05-10 10:39:19 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-22 19:22:45 -07:00
|
|
|
Parser::Parser(Document* document, ReadonlyBytes bytes)
|
2021-04-30 18:33:13 -07:00
|
|
|
: m_reader(bytes)
|
2022-03-22 19:22:45 -07:00
|
|
|
, m_document(document)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-11 01:06:34 +01:00
|
|
|
Parser::Parser(ReadonlyBytes bytes)
|
2021-05-10 10:39:19 -07:00
|
|
|
: m_reader(bytes)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-28 11:52:48 +01:00
|
|
|
void Parser::set_document(WeakPtr<Document> const& document)
|
2021-09-17 03:08:30 +02:00
|
|
|
{
|
|
|
|
|
m_document = document;
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
String Parser::parse_comment()
|
|
|
|
|
{
|
|
|
|
|
if (!m_reader.matches('%'))
|
|
|
|
|
return {};
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
auto comment_start_offset = m_reader.offset();
|
AK+Everywhere: Disallow constructing Functions from incompatible types
Previously, AK::Function would accept _any_ callable type, and try to
call it when called, first with the given set of arguments, then with
zero arguments, and if all of those failed, it would simply not call the
function and **return a value-constructed Out type**.
This lead to many, many, many hard to debug situations when someone
forgot a `const` in their lambda argument types, and many cases of
people taking zero arguments in their lambdas to ignore them.
This commit reworks the Function interface to not include any such
surprising behaviour, if your function instance is not callable with
the declared argument set of the Function, it can simply not be
assigned to that Function instance, end of story.
2021-06-05 23:04:31 +04:30
|
|
|
m_reader.move_until([&](auto) {
|
2022-08-15 11:02:38 +02:00
|
|
|
return m_reader.matches_eol();
|
2021-04-30 18:33:13 -07:00
|
|
|
});
|
|
|
|
|
String str = StringView(m_reader.bytes().slice(comment_start_offset, m_reader.offset() - comment_start_offset));
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_eol();
|
|
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-20 09:00:51 +02:00
|
|
|
PDFErrorOr<Value> Parser::parse_value(CanBeIndirectValue can_be_indirect_value)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
parse_comment();
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches("null")) {
|
|
|
|
|
m_reader.move_by(4);
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-09-19 20:56:05 +02:00
|
|
|
return Value(nullptr);
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches("true")) {
|
|
|
|
|
m_reader.move_by(4);
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
return Value(true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches("false")) {
|
|
|
|
|
m_reader.move_by(5);
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
return Value(false);
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-20 09:00:51 +02:00
|
|
|
if (m_reader.matches_number()) {
|
|
|
|
|
if (can_be_indirect_value == CanBeIndirectValue::Yes)
|
|
|
|
|
return parse_possible_indirect_value_or_ref();
|
|
|
|
|
else
|
|
|
|
|
return parse_number();
|
|
|
|
|
}
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
if (m_reader.matches('/'))
|
2022-03-05 17:30:55 -07:00
|
|
|
return MUST(parse_name());
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
if (m_reader.matches("<<")) {
|
2022-03-05 17:30:55 -07:00
|
|
|
auto dict = TRY(parse_dict());
|
2021-05-26 22:51:10 -07:00
|
|
|
if (m_reader.matches("stream"))
|
2022-03-05 17:30:55 -07:00
|
|
|
return TRY(parse_stream(dict));
|
2021-04-30 18:33:13 -07:00
|
|
|
return dict;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches_any('(', '<'))
|
|
|
|
|
return parse_string();
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches('['))
|
2022-03-05 17:30:55 -07:00
|
|
|
return TRY(parse_array());
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
return error(String::formatted("Unexpected char \"{}\"", m_reader.peek()));
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<Value> Parser::parse_possible_indirect_value_or_ref()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-03-05 17:30:55 -07:00
|
|
|
auto first_number = TRY(parse_number());
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.matches_number())
|
2021-04-30 18:33:13 -07:00
|
|
|
return first_number;
|
|
|
|
|
|
|
|
|
|
m_reader.save();
|
|
|
|
|
auto second_number = parse_number();
|
2022-03-05 17:30:55 -07:00
|
|
|
if (second_number.is_error()) {
|
2021-04-30 18:33:13 -07:00
|
|
|
m_reader.load();
|
|
|
|
|
return first_number;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches('R')) {
|
|
|
|
|
m_reader.discard();
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
|
|
|
|
m_reader.consume_whitespace();
|
2022-03-05 17:30:55 -07:00
|
|
|
return Value(Reference(first_number.get<int>(), second_number.value().get<int>()));
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches("obj")) {
|
|
|
|
|
m_reader.discard();
|
2022-03-21 11:26:31 -07:00
|
|
|
auto index = first_number.get<int>();
|
|
|
|
|
auto generation = second_number.value().get<int>();
|
|
|
|
|
VERIFY(index >= 0);
|
|
|
|
|
VERIFY(generation >= 0);
|
|
|
|
|
return TRY(parse_indirect_value(index, generation));
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
m_reader.load();
|
|
|
|
|
return first_number;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-21 11:26:31 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<IndirectValue>> Parser::parse_indirect_value(u32 index, u32 generation)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2021-05-24 13:57:16 -07:00
|
|
|
if (!m_reader.matches("obj"))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected \"obj\" at beginning of indirect value");
|
2021-04-30 18:33:13 -07:00
|
|
|
m_reader.move_by(3);
|
2022-08-20 09:24:14 +02:00
|
|
|
m_reader.consume_whitespace();
|
2022-03-21 11:26:31 -07:00
|
|
|
|
|
|
|
|
push_reference({ index, generation });
|
2022-03-05 17:30:55 -07:00
|
|
|
auto value = TRY(parse_value());
|
2021-05-24 13:57:16 -07:00
|
|
|
if (!m_reader.matches("endobj"))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected \"endobj\" at end of indirect value");
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume(6);
|
|
|
|
|
m_reader.consume_whitespace();
|
2021-05-26 22:51:10 -07:00
|
|
|
|
2022-03-21 11:26:31 -07:00
|
|
|
pop_reference();
|
|
|
|
|
|
2021-05-22 15:25:27 -07:00
|
|
|
return make_object<IndirectValue>(index, generation, value);
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<IndirectValue>> Parser::parse_indirect_value()
|
2021-05-08 14:57:49 -07:00
|
|
|
{
|
2022-03-05 17:30:55 -07:00
|
|
|
auto first_number = TRY(parse_number());
|
|
|
|
|
auto second_number = TRY(parse_number());
|
2022-03-21 11:26:31 -07:00
|
|
|
auto index = first_number.get<int>();
|
|
|
|
|
auto generation = second_number.get<int>();
|
|
|
|
|
VERIFY(index >= 0);
|
|
|
|
|
VERIFY(generation >= 0);
|
|
|
|
|
return parse_indirect_value(index, generation);
|
2021-05-08 14:57:49 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<Value> Parser::parse_number()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
size_t start_offset = m_reader.offset();
|
|
|
|
|
bool is_float = false;
|
2022-03-05 17:30:55 -07:00
|
|
|
bool consumed_digit = false;
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
if (m_reader.matches('+') || m_reader.matches('-'))
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
while (!m_reader.done()) {
|
|
|
|
|
if (m_reader.matches('.')) {
|
|
|
|
|
if (is_float)
|
|
|
|
|
break;
|
|
|
|
|
is_float = true;
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
} else if (isdigit(m_reader.peek())) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2022-03-05 17:30:55 -07:00
|
|
|
consumed_digit = true;
|
2021-04-30 18:33:13 -07:00
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
if (!consumed_digit)
|
|
|
|
|
return error("Invalid number");
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-05-02 18:53:07 -07:00
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
auto string = String(m_reader.bytes().slice(start_offset, m_reader.offset() - start_offset));
|
|
|
|
|
float f = strtof(string.characters(), nullptr);
|
|
|
|
|
if (is_float)
|
|
|
|
|
return Value(f);
|
|
|
|
|
|
|
|
|
|
VERIFY(floorf(f) == f);
|
|
|
|
|
return Value(static_cast<int>(f));
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<NameObject>> Parser::parse_name()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.consume('/'))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected Name object to start with \"/\"");
|
|
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
StringBuilder builder;
|
|
|
|
|
|
|
|
|
|
while (true) {
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.matches_regular_character())
|
2021-04-30 18:33:13 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches('#')) {
|
2022-10-27 19:56:21 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
int hex_value = 0;
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2022-08-15 11:02:38 +02:00
|
|
|
auto ch = m_reader.consume();
|
2022-03-05 17:30:55 -07:00
|
|
|
VERIFY(isxdigit(ch));
|
2021-04-30 18:33:13 -07:00
|
|
|
hex_value *= 16;
|
|
|
|
|
if (ch <= '9') {
|
|
|
|
|
hex_value += ch - '0';
|
|
|
|
|
} else {
|
|
|
|
|
hex_value += ch - 'A' + 10;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
builder.append(static_cast<char>(hex_value));
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
builder.append(m_reader.consume());
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
return make_object<NameObject>(builder.to_string());
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
NonnullRefPtr<StringObject> Parser::parse_string()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
ScopeGuard guard([&] { m_reader.consume_whitespace(); });
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2021-05-23 21:27:17 -07:00
|
|
|
String string;
|
|
|
|
|
bool is_binary_string;
|
|
|
|
|
|
|
|
|
|
if (m_reader.matches('(')) {
|
|
|
|
|
string = parse_literal_string();
|
|
|
|
|
is_binary_string = false;
|
|
|
|
|
} else {
|
|
|
|
|
string = parse_hex_string();
|
|
|
|
|
is_binary_string = true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
VERIFY(!string.is_null());
|
2021-05-24 13:57:16 -07:00
|
|
|
|
2022-03-22 19:29:49 -07:00
|
|
|
auto string_object = make_object<StringObject>(string, is_binary_string);
|
|
|
|
|
|
|
|
|
|
if (m_document->security_handler() && !m_disable_encryption)
|
|
|
|
|
m_document->security_handler()->decrypt(string_object, m_current_reference_stack.last());
|
|
|
|
|
|
|
|
|
|
auto unencrypted_string = string_object->string();
|
|
|
|
|
|
|
|
|
|
if (unencrypted_string.bytes().starts_with(Array<u8, 2> { 0xfe, 0xff })) {
|
2021-05-23 21:27:17 -07:00
|
|
|
// The string is encoded in UTF16-BE
|
2022-03-22 19:29:49 -07:00
|
|
|
string_object->set_string(TextCodec::decoder_for("utf-16be")->to_utf8(unencrypted_string));
|
|
|
|
|
} else if (unencrypted_string.bytes().starts_with(Array<u8, 3> { 239, 187, 191 })) {
|
2021-05-23 21:27:17 -07:00
|
|
|
// The string is encoded in UTF-8. This is the default anyways, but if these bytes
|
|
|
|
|
// are explicitly included, we have to trim them
|
2022-03-22 19:29:49 -07:00
|
|
|
string_object->set_string(unencrypted_string.substring(3));
|
2021-05-23 21:27:17 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-22 19:29:49 -07:00
|
|
|
return string_object;
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String Parser::parse_literal_string()
|
|
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
VERIFY(m_reader.consume('('));
|
2021-04-30 18:33:13 -07:00
|
|
|
StringBuilder builder;
|
|
|
|
|
auto opened_parens = 0;
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
if (m_reader.matches('(')) {
|
|
|
|
|
opened_parens++;
|
2022-08-15 11:02:38 +02:00
|
|
|
builder.append(m_reader.consume());
|
2021-04-30 18:33:13 -07:00
|
|
|
} else if (m_reader.matches(')')) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
if (opened_parens == 0)
|
|
|
|
|
break;
|
|
|
|
|
opened_parens--;
|
|
|
|
|
builder.append(')');
|
|
|
|
|
} else if (m_reader.matches('\\')) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
|
|
|
|
if (m_reader.matches_eol()) {
|
|
|
|
|
m_reader.consume_eol();
|
2021-04-30 18:33:13 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-24 13:57:16 -07:00
|
|
|
if (m_reader.done())
|
|
|
|
|
return {};
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
auto ch = m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
switch (ch) {
|
|
|
|
|
case 'n':
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
break;
|
|
|
|
|
case 'r':
|
|
|
|
|
builder.append('\r');
|
|
|
|
|
break;
|
|
|
|
|
case 't':
|
|
|
|
|
builder.append('\t');
|
|
|
|
|
break;
|
|
|
|
|
case 'b':
|
|
|
|
|
builder.append('\b');
|
|
|
|
|
break;
|
|
|
|
|
case 'f':
|
|
|
|
|
builder.append('\f');
|
|
|
|
|
break;
|
|
|
|
|
case '(':
|
|
|
|
|
builder.append('(');
|
|
|
|
|
break;
|
|
|
|
|
case ')':
|
|
|
|
|
builder.append(')');
|
|
|
|
|
break;
|
|
|
|
|
case '\\':
|
|
|
|
|
builder.append('\\');
|
|
|
|
|
break;
|
|
|
|
|
default: {
|
|
|
|
|
if (ch >= '0' && ch <= '7') {
|
|
|
|
|
int octal_value = ch - '0';
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2022-08-15 11:02:38 +02:00
|
|
|
auto octal_ch = m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
if (octal_ch < '0' || octal_ch > '7')
|
|
|
|
|
break;
|
|
|
|
|
octal_value = octal_value * 8 + (octal_ch - '0');
|
|
|
|
|
}
|
|
|
|
|
builder.append(static_cast<char>(octal_value));
|
|
|
|
|
} else {
|
|
|
|
|
builder.append(ch);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-08-15 11:02:38 +02:00
|
|
|
} else if (m_reader.matches_eol()) {
|
|
|
|
|
m_reader.consume_eol();
|
2021-04-30 18:33:13 -07:00
|
|
|
builder.append('\n');
|
|
|
|
|
} else {
|
2022-08-15 11:02:38 +02:00
|
|
|
builder.append(m_reader.consume());
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return builder.to_string();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String Parser::parse_hex_string()
|
|
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
VERIFY(m_reader.consume('<'));
|
2022-03-05 17:30:55 -07:00
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
StringBuilder builder;
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
if (m_reader.matches('>')) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
return builder.to_string();
|
|
|
|
|
} else {
|
|
|
|
|
int hex_value = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2022-08-15 11:02:38 +02:00
|
|
|
auto ch = m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
if (ch == '>') {
|
|
|
|
|
// The hex string contains an odd number of characters, and the last character
|
|
|
|
|
// is assumed to be '0'
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-04-30 18:33:13 -07:00
|
|
|
hex_value *= 16;
|
|
|
|
|
builder.append(static_cast<char>(hex_value));
|
|
|
|
|
return builder.to_string();
|
|
|
|
|
}
|
2021-05-24 13:57:16 -07:00
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
VERIFY(isxdigit(ch));
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
hex_value *= 16;
|
|
|
|
|
if (ch <= '9') {
|
|
|
|
|
hex_value += ch - '0';
|
2022-03-05 22:34:57 -07:00
|
|
|
} else if (ch >= 'A' && ch <= 'F') {
|
2021-04-30 18:33:13 -07:00
|
|
|
hex_value += ch - 'A' + 10;
|
2022-03-05 22:34:57 -07:00
|
|
|
} else {
|
|
|
|
|
hex_value += ch - 'a' + 10;
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
builder.append(static_cast<char>(hex_value));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<ArrayObject>> Parser::parse_array()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.consume('['))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected array to start with \"[\"");
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
Vector<Value> values;
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
while (!m_reader.matches(']'))
|
|
|
|
|
values.append(TRY(parse_value()));
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
VERIFY(m_reader.consume(']'));
|
|
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
return make_object<ArrayObject>(values);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<DictObject>> Parser::parse_dict()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.consume('<') || !m_reader.consume('<'))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected dict to start with \"<<\"");
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
HashMap<FlyString, Value> map;
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
while (!m_reader.done()) {
|
2021-04-30 18:33:13 -07:00
|
|
|
if (m_reader.matches(">>"))
|
|
|
|
|
break;
|
2022-03-05 17:30:55 -07:00
|
|
|
auto name = TRY(parse_name())->name();
|
|
|
|
|
auto value = TRY(parse_value());
|
|
|
|
|
map.set(name, value);
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.consume('>') || !m_reader.consume('>'))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected dict to end with \">>\"");
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
return make_object<DictObject>(map);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
PDFErrorOr<NonnullRefPtr<StreamObject>> Parser::parse_stream(NonnullRefPtr<DictObject> dict)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2021-05-24 13:57:16 -07:00
|
|
|
if (!m_reader.matches("stream"))
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected stream to start with \"stream\"");
|
2021-04-30 18:33:13 -07:00
|
|
|
m_reader.move_by(6);
|
2022-08-15 11:02:38 +02:00
|
|
|
if (!m_reader.consume_eol())
|
2022-03-05 17:30:55 -07:00
|
|
|
return error("Expected \"stream\" to be followed by a newline");
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2021-05-10 10:36:37 -07:00
|
|
|
ReadonlyBytes bytes;
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2021-05-24 08:15:43 -07:00
|
|
|
auto maybe_length = dict->get(CommonNames::Length);
|
2022-08-15 11:45:24 +02:00
|
|
|
if (maybe_length.has_value() && (!maybe_length->has<Reference>())) {
|
2021-05-10 10:36:37 -07:00
|
|
|
// The PDF writer has kindly provided us with the direct length of the stream
|
|
|
|
|
m_reader.save();
|
2022-03-05 17:30:55 -07:00
|
|
|
auto length = TRY(m_document->resolve_to<int>(maybe_length.value()));
|
2021-05-10 10:36:37 -07:00
|
|
|
m_reader.load();
|
|
|
|
|
bytes = m_reader.bytes().slice(m_reader.offset(), length);
|
|
|
|
|
m_reader.move_by(length);
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-05-10 10:36:37 -07:00
|
|
|
} else {
|
|
|
|
|
// We have to look for the endstream keyword
|
|
|
|
|
auto stream_start = m_reader.offset();
|
|
|
|
|
|
|
|
|
|
while (true) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.move_until([&](auto) { return m_reader.matches_eol(); });
|
2021-05-10 10:36:37 -07:00
|
|
|
auto potential_stream_end = m_reader.offset();
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_eol();
|
2021-05-10 10:36:37 -07:00
|
|
|
if (!m_reader.matches("endstream"))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
bytes = m_reader.bytes().slice(stream_start, potential_stream_end - stream_start);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
m_reader.move_by(9);
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2022-03-22 19:29:49 -07:00
|
|
|
auto stream_object = make_object<StreamObject>(dict, MUST(ByteBuffer::copy(bytes)));
|
|
|
|
|
|
|
|
|
|
if (m_document->security_handler() && !m_disable_encryption)
|
|
|
|
|
m_document->security_handler()->decrypt(stream_object, m_current_reference_stack.last());
|
|
|
|
|
|
2021-05-24 08:15:43 -07:00
|
|
|
if (dict->contains(CommonNames::Filter)) {
|
2022-10-27 20:01:56 +02:00
|
|
|
Vector<FlyString> filters;
|
|
|
|
|
|
|
|
|
|
// We may either get a single filter or an array of cascading filters
|
|
|
|
|
auto filter_object = TRY(dict->get_object(m_document, CommonNames::Filter));
|
|
|
|
|
if (filter_object->is<ArrayObject>()) {
|
|
|
|
|
auto filter_array = filter_object->cast<ArrayObject>();
|
|
|
|
|
for (size_t i = 0; i < filter_array->size(); ++i)
|
|
|
|
|
filters.append(TRY(filter_array->get_name_at(m_document, i))->name());
|
|
|
|
|
} else {
|
|
|
|
|
filters.append(filter_object->cast<NameObject>()->name());
|
2022-01-20 17:26:37 +00:00
|
|
|
}
|
2022-10-27 20:01:56 +02:00
|
|
|
|
2022-11-10 23:32:41 +01:00
|
|
|
// Every filter may get its own parameter dictionary
|
|
|
|
|
Vector<RefPtr<DictObject>> decode_parms_vector;
|
|
|
|
|
RefPtr<Object> decode_parms_object;
|
|
|
|
|
if (dict->contains(CommonNames::DecodeParms)) {
|
|
|
|
|
decode_parms_object = TRY(dict->get_object(m_document, CommonNames::DecodeParms));
|
|
|
|
|
if (decode_parms_object->is<ArrayObject>()) {
|
|
|
|
|
auto decode_parms_array = decode_parms_object->cast<ArrayObject>();
|
|
|
|
|
for (size_t i = 0; i < decode_parms_array->size(); ++i) {
|
|
|
|
|
// FIXME: This entry may be the null object instead
|
|
|
|
|
RefPtr<DictObject> decode_parms = decode_parms_array->at(i).get<NonnullRefPtr<Object>>()->cast<DictObject>();
|
|
|
|
|
decode_parms_vector.append(decode_parms);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
decode_parms_vector.append(decode_parms_object->cast<DictObject>());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VERIFY(decode_parms_vector.is_empty() || decode_parms_vector.size() == filters.size());
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < filters.size(); ++i) {
|
|
|
|
|
RefPtr<DictObject> decode_parms;
|
|
|
|
|
if (!decode_parms_vector.is_empty())
|
|
|
|
|
decode_parms = decode_parms_vector.at(i);
|
|
|
|
|
|
|
|
|
|
stream_object->buffer() = TRY(Filter::decode(stream_object->bytes(), filters.at(i), decode_parms));
|
|
|
|
|
}
|
2021-05-22 20:44:18 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-22 19:29:49 -07:00
|
|
|
return stream_object;
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-25 15:00:11 -07:00
|
|
|
PDFErrorOr<Vector<Operator>> Parser::parse_operators()
|
2021-05-10 10:39:19 -07:00
|
|
|
{
|
2022-03-25 15:00:11 -07:00
|
|
|
Vector<Operator> operators;
|
|
|
|
|
Vector<Value> operator_args;
|
2021-05-10 10:39:19 -07:00
|
|
|
|
2022-03-25 15:00:11 -07:00
|
|
|
constexpr static auto is_operator_char = [](char ch) {
|
2021-05-10 10:39:19 -07:00
|
|
|
return isalpha(ch) || ch == '*' || ch == '\'';
|
|
|
|
|
};
|
|
|
|
|
|
2022-08-25 10:33:07 +02:00
|
|
|
m_reader.consume_whitespace();
|
|
|
|
|
|
2021-05-10 10:39:19 -07:00
|
|
|
while (!m_reader.done()) {
|
|
|
|
|
auto ch = m_reader.peek();
|
2022-03-25 15:00:11 -07:00
|
|
|
if (is_operator_char(ch)) {
|
|
|
|
|
auto operator_start = m_reader.offset();
|
|
|
|
|
while (is_operator_char(ch)) {
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume();
|
2021-05-10 10:39:19 -07:00
|
|
|
if (m_reader.done())
|
|
|
|
|
break;
|
|
|
|
|
ch = m_reader.peek();
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-25 15:00:11 -07:00
|
|
|
auto operator_string = StringView(m_reader.bytes().slice(operator_start, m_reader.offset() - operator_start));
|
|
|
|
|
auto operator_type = Operator::operator_type_from_symbol(operator_string);
|
|
|
|
|
operators.append(Operator(operator_type, move(operator_args)));
|
|
|
|
|
operator_args = Vector<Value>();
|
2022-08-15 11:02:38 +02:00
|
|
|
m_reader.consume_whitespace();
|
2021-05-10 10:39:19 -07:00
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-20 09:00:51 +02:00
|
|
|
// Note: We disallow parsing indirect values here, since
|
|
|
|
|
// operations like 0 0 0 RG would confuse the parser
|
|
|
|
|
auto v = TRY(parse_value(CanBeIndirectValue::No));
|
|
|
|
|
operator_args.append(v);
|
2021-05-10 10:39:19 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-25 15:00:11 -07:00
|
|
|
return operators;
|
2021-05-10 10:39:19 -07:00
|
|
|
}
|
|
|
|
|
|
2022-03-05 17:30:55 -07:00
|
|
|
Error Parser::error(
|
|
|
|
|
String const& message
|
|
|
|
|
#ifdef PDF_DEBUG
|
|
|
|
|
,
|
|
|
|
|
SourceLocation loc
|
|
|
|
|
#endif
|
|
|
|
|
) const
|
|
|
|
|
{
|
|
|
|
|
#ifdef PDF_DEBUG
|
|
|
|
|
dbgln("\033[31m{} Parser error at offset {}: {}\033[0m", loc, m_reader.offset(), message);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return Error { Error::Type::Parse, message };
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|