2021-04-30 18:23:17 -07:00
|
|
|
/*
|
2022-03-05 17:30:55 -07:00
|
|
|
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
|
2021-04-30 18:23:17 -07:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <AK/Hex.h>
|
2024-02-05 19:36:35 -05:00
|
|
|
#include <LibPDF/CommonNames.h>
|
2021-05-08 14:57:49 -07:00
|
|
|
#include <LibPDF/Document.h>
|
2021-09-17 02:28:52 +02:00
|
|
|
#include <LibPDF/ObjectDerivatives.h>
|
2021-04-30 18:23:17 -07:00
|
|
|
|
|
|
|
|
namespace PDF {
|
|
|
|
|
|
LibPDF: Add more utility methods to {Dict,Array}Object
Being both of them containers, these classes already offered a set of
methods to retrieve an inner element by key or index, respectively, with
different methods for the different subtypes of the PDF::Object type
returning the element cast to the correct type pointer. On top of
that, DictObject offered an additional method to obtain an element as an
Object pointer.
While these methods were useful, they have some shortcomings:
* They always take a Document pointer to first perform an object
resolution, in case the element is a Reference. This is not always
necessary though, as there are values that are always meant to be
immediate, and hence the resolution lookup adds overhead.
* There was no easy way to get an individual Object element from an
ArrayObject like there is in DictObject. This makes it difficult to
obtain such values, as one first needs to call dict.get() to get a
Value, then cast it manually to a NonnullRefPtr<Object>.
This commit fixes these two issues by:
* Adding a new method that returns an Object for a given index.
* Adding overloads for this new method, and all the existing methods
described above, that do *not* take a Document, and therefore do
*not* perform an object resolution lookup.
2023-01-06 00:19:12 +08:00
|
|
|
PDFErrorOr<NonnullRefPtr<Object>> ArrayObject::get_object_at(Document* document, size_t index) const
|
|
|
|
|
{
|
|
|
|
|
return document->resolve_to<Object>(at(index));
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-08 19:23:00 -05:00
|
|
|
PDFErrorOr<NonnullRefPtr<Object>> DictObject::get_object(Document* document, DeprecatedFlyString const& key) const
|
2021-05-08 14:57:49 -07:00
|
|
|
{
|
|
|
|
|
return document->resolve_to<Object>(get_value(key));
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-08 19:23:00 -05:00
|
|
|
#define DEFINE_ACCESSORS(class_name, snake_name) \
|
|
|
|
|
PDFErrorOr<NonnullRefPtr<class_name>> ArrayObject::get_##snake_name##_at(Document* document, size_t index) const \
|
|
|
|
|
{ \
|
|
|
|
|
if (index >= m_elements.size()) \
|
|
|
|
|
return Error { Error::Type::Internal, "Out of bounds array access" }; \
|
|
|
|
|
return document->resolve_to<class_name>(m_elements[index]); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
NonnullRefPtr<class_name> ArrayObject::get_##snake_name##_at(size_t index) const \
|
|
|
|
|
{ \
|
|
|
|
|
VERIFY(index < m_elements.size()); \
|
|
|
|
|
return cast_to<class_name>(m_elements[index]); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
PDFErrorOr<NonnullRefPtr<class_name>> DictObject::get_##snake_name(Document* document, DeprecatedFlyString const& key) const \
|
|
|
|
|
{ \
|
|
|
|
|
return document->resolve_to<class_name>(get_value(key)); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
NonnullRefPtr<class_name> DictObject::get_##snake_name(DeprecatedFlyString const& key) const \
|
|
|
|
|
{ \
|
|
|
|
|
return cast_to<class_name>(get_value(key)); \
|
2021-05-08 14:57:49 -07:00
|
|
|
}
|
LibPDF: Add more utility methods to {Dict,Array}Object
Being both of them containers, these classes already offered a set of
methods to retrieve an inner element by key or index, respectively, with
different methods for the different subtypes of the PDF::Object type
returning the element cast to the correct type pointer. On top of
that, DictObject offered an additional method to obtain an element as an
Object pointer.
While these methods were useful, they have some shortcomings:
* They always take a Document pointer to first perform an object
resolution, in case the element is a Reference. This is not always
necessary though, as there are values that are always meant to be
immediate, and hence the resolution lookup adds overhead.
* There was no easy way to get an individual Object element from an
ArrayObject like there is in DictObject. This makes it difficult to
obtain such values, as one first needs to call dict.get() to get a
Value, then cast it manually to a NonnullRefPtr<Object>.
This commit fixes these two issues by:
* Adding a new method that returns an Object for a given index.
* Adding overloads for this new method, and all the existing methods
described above, that do *not* take a Document, and therefore do
*not* perform an object resolution lookup.
2023-01-06 00:19:12 +08:00
|
|
|
|
2021-05-23 16:12:25 -07:00
|
|
|
ENUMERATE_OBJECT_TYPES(DEFINE_ACCESSORS)
|
2021-05-08 14:57:49 -07:00
|
|
|
#undef DEFINE_INDEXER
|
|
|
|
|
|
2021-04-30 18:23:17 -07:00
|
|
|
static void append_indent(StringBuilder& builder, int indent)
|
|
|
|
|
{
|
|
|
|
|
for (int i = 0; i < indent; i++)
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append(" "sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString StringObject::to_byte_string(int) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
if (is_binary())
|
2023-12-16 17:49:34 +03:30
|
|
|
return ByteString::formatted("<{}>", encode_hex(string().bytes()).to_uppercase());
|
|
|
|
|
return ByteString::formatted("({})", string());
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString NameObject::to_byte_string(int) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
|
|
|
|
builder.appendff("/{}", this->name());
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2022-11-25 16:34:06 +08:00
|
|
|
Vector<float> ArrayObject::float_elements() const
|
|
|
|
|
{
|
|
|
|
|
Vector<float> values;
|
|
|
|
|
values.ensure_capacity(m_elements.size());
|
|
|
|
|
for (auto const& value : m_elements) {
|
|
|
|
|
values.append(value.to_float());
|
|
|
|
|
}
|
|
|
|
|
return values;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ArrayObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("[\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
bool first = true;
|
|
|
|
|
|
|
|
|
|
for (auto& element : elements()) {
|
|
|
|
|
if (!first)
|
2023-07-12 12:19:50 -04:00
|
|
|
builder.append("\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
first = false;
|
|
|
|
|
append_indent(builder, indent + 1);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}", element.to_byte_string(indent));
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
|
|
|
|
builder.append(']');
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString DictObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2023-07-12 12:19:50 -04:00
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("<<\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
bool first = true;
|
|
|
|
|
|
|
|
|
|
for (auto& [key, value] : map()) {
|
|
|
|
|
if (!first)
|
2023-07-12 12:19:50 -04:00
|
|
|
builder.append("\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
first = false;
|
|
|
|
|
append_indent(builder, indent + 1);
|
|
|
|
|
builder.appendff("/{} ", key);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}", value.to_byte_string(indent + 1));
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append(">>"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString StreamObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}\n", dict()->to_byte_string(indent));
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("stream\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
|
2023-07-12 12:19:50 -04:00
|
|
|
size_t ascii_count = 0;
|
|
|
|
|
for (auto c : bytes()) {
|
|
|
|
|
if (c < 128)
|
|
|
|
|
++ascii_count;
|
|
|
|
|
}
|
2021-04-30 18:23:17 -07:00
|
|
|
|
2023-07-12 12:19:50 -04:00
|
|
|
size_t percentage_ascii = 100;
|
|
|
|
|
if (bytes().size())
|
|
|
|
|
percentage_ascii = ascii_count * 100 / bytes().size();
|
|
|
|
|
bool is_mostly_text = percentage_ascii > 95;
|
|
|
|
|
|
2024-02-05 19:36:35 -05:00
|
|
|
if (dict()->contains(CommonNames::Subtype) && dict()->get_name(CommonNames::Subtype)->name() == "Image")
|
|
|
|
|
is_mostly_text = false;
|
|
|
|
|
|
2023-07-12 12:19:50 -04:00
|
|
|
if (is_mostly_text) {
|
2024-01-10 11:23:06 -05:00
|
|
|
for (size_t i = 0; i < bytes().size(); ++i) {
|
|
|
|
|
auto c = bytes()[i];
|
|
|
|
|
if (c < 128) {
|
|
|
|
|
bool next_is_newline = i + 1 < bytes().size() && bytes()[i + 1] == '\n';
|
|
|
|
|
if (c == '\r' && !next_is_newline)
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
else
|
|
|
|
|
builder.append(c);
|
|
|
|
|
} else {
|
2023-07-12 12:19:50 -04:00
|
|
|
builder.appendff("\\{:03o}", c);
|
2024-01-10 11:23:06 -05:00
|
|
|
}
|
2023-07-12 12:19:50 -04:00
|
|
|
}
|
|
|
|
|
} else {
|
2024-02-02 19:37:11 -05:00
|
|
|
int const chars_per_line = 60;
|
|
|
|
|
int const bytes_per_line = chars_per_line / 2;
|
|
|
|
|
int const max_lines_to_print = 10;
|
|
|
|
|
int const max_bytes_to_print = max_lines_to_print * bytes_per_line;
|
|
|
|
|
auto string = encode_hex(bytes().trim(max_bytes_to_print));
|
2023-07-14 09:06:01 -04:00
|
|
|
StringView view { string };
|
|
|
|
|
while (view.length() > 60) {
|
2024-02-02 19:37:11 -05:00
|
|
|
builder.appendff("{}\n", view.substring_view(0, chars_per_line));
|
2023-07-14 09:02:22 -04:00
|
|
|
append_indent(builder, indent);
|
2023-07-14 09:06:01 -04:00
|
|
|
view = view.substring_view(60);
|
2023-07-12 12:19:50 -04:00
|
|
|
}
|
2023-07-14 09:06:01 -04:00
|
|
|
builder.appendff("{}\n", view);
|
2024-02-02 19:37:11 -05:00
|
|
|
|
|
|
|
|
if (bytes().size() > max_bytes_to_print)
|
|
|
|
|
builder.appendff("... (and {} more bytes)\n", bytes().size() - max_bytes_to_print);
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("endstream"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString IndirectValue::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
|
|
|
|
builder.appendff("{} {} obj\n", index(), generation_index());
|
|
|
|
|
append_indent(builder, indent + 1);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.append(value().to_byte_string(indent + 1));
|
2021-04-30 18:23:17 -07:00
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("endobj"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|