2021-04-30 18:23:17 -07:00
|
|
|
/*
|
2022-03-05 17:30:55 -07:00
|
|
|
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
|
2021-04-30 18:23:17 -07:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <AK/Hex.h>
|
2021-05-08 14:57:49 -07:00
|
|
|
#include <LibPDF/Document.h>
|
2021-09-17 02:28:52 +02:00
|
|
|
#include <LibPDF/ObjectDerivatives.h>
|
2021-04-30 18:23:17 -07:00
|
|
|
|
|
|
|
|
namespace PDF {
|
|
|
|
|
|
LibPDF: Add more utility methods to {Dict,Array}Object
Being both of them containers, these classes already offered a set of
methods to retrieve an inner element by key or index, respectively, with
different methods for the different subtypes of the PDF::Object type
returning the element cast to the correct type pointer. On top of
that, DictObject offered an additional method to obtain an element as an
Object pointer.
While these methods were useful, they have some shortcomings:
* They always take a Document pointer to first perform an object
resolution, in case the element is a Reference. This is not always
necessary though, as there are values that are always meant to be
immediate, and hence the resolution lookup adds overhead.
* There was no easy way to get an individual Object element from an
ArrayObject like there is in DictObject. This makes it difficult to
obtain such values, as one first needs to call dict.get() to get a
Value, then cast it manually to a NonnullRefPtr<Object>.
This commit fixes these two issues by:
* Adding a new method that returns an Object for a given index.
* Adding overloads for this new method, and all the existing methods
described above, that do *not* take a Document, and therefore do
*not* perform an object resolution lookup.
2023-01-06 00:19:12 +08:00
|
|
|
PDFErrorOr<NonnullRefPtr<Object>> ArrayObject::get_object_at(Document* document, size_t index) const
|
|
|
|
|
{
|
|
|
|
|
return document->resolve_to<Object>(at(index));
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-08 19:23:00 -05:00
|
|
|
PDFErrorOr<NonnullRefPtr<Object>> DictObject::get_object(Document* document, DeprecatedFlyString const& key) const
|
2021-05-08 14:57:49 -07:00
|
|
|
{
|
|
|
|
|
return document->resolve_to<Object>(get_value(key));
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-08 19:23:00 -05:00
|
|
|
#define DEFINE_ACCESSORS(class_name, snake_name) \
|
|
|
|
|
PDFErrorOr<NonnullRefPtr<class_name>> ArrayObject::get_##snake_name##_at(Document* document, size_t index) const \
|
|
|
|
|
{ \
|
|
|
|
|
if (index >= m_elements.size()) \
|
|
|
|
|
return Error { Error::Type::Internal, "Out of bounds array access" }; \
|
|
|
|
|
return document->resolve_to<class_name>(m_elements[index]); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
NonnullRefPtr<class_name> ArrayObject::get_##snake_name##_at(size_t index) const \
|
|
|
|
|
{ \
|
|
|
|
|
VERIFY(index < m_elements.size()); \
|
|
|
|
|
return cast_to<class_name>(m_elements[index]); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
PDFErrorOr<NonnullRefPtr<class_name>> DictObject::get_##snake_name(Document* document, DeprecatedFlyString const& key) const \
|
|
|
|
|
{ \
|
|
|
|
|
return document->resolve_to<class_name>(get_value(key)); \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
NonnullRefPtr<class_name> DictObject::get_##snake_name(DeprecatedFlyString const& key) const \
|
|
|
|
|
{ \
|
|
|
|
|
return cast_to<class_name>(get_value(key)); \
|
2021-05-08 14:57:49 -07:00
|
|
|
}
|
LibPDF: Add more utility methods to {Dict,Array}Object
Being both of them containers, these classes already offered a set of
methods to retrieve an inner element by key or index, respectively, with
different methods for the different subtypes of the PDF::Object type
returning the element cast to the correct type pointer. On top of
that, DictObject offered an additional method to obtain an element as an
Object pointer.
While these methods were useful, they have some shortcomings:
* They always take a Document pointer to first perform an object
resolution, in case the element is a Reference. This is not always
necessary though, as there are values that are always meant to be
immediate, and hence the resolution lookup adds overhead.
* There was no easy way to get an individual Object element from an
ArrayObject like there is in DictObject. This makes it difficult to
obtain such values, as one first needs to call dict.get() to get a
Value, then cast it manually to a NonnullRefPtr<Object>.
This commit fixes these two issues by:
* Adding a new method that returns an Object for a given index.
* Adding overloads for this new method, and all the existing methods
described above, that do *not* take a Document, and therefore do
*not* perform an object resolution lookup.
2023-01-06 00:19:12 +08:00
|
|
|
|
2021-05-23 16:12:25 -07:00
|
|
|
ENUMERATE_OBJECT_TYPES(DEFINE_ACCESSORS)
|
2021-05-08 14:57:49 -07:00
|
|
|
#undef DEFINE_INDEXER
|
|
|
|
|
|
2021-04-30 18:23:17 -07:00
|
|
|
static void append_indent(StringBuilder& builder, int indent)
|
|
|
|
|
{
|
|
|
|
|
for (int i = 0; i < indent; i++)
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append(" "sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString StringObject::to_byte_string(int) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
if (is_binary())
|
2023-12-16 17:49:34 +03:30
|
|
|
return ByteString::formatted("<{}>", encode_hex(string().bytes()).to_uppercase());
|
|
|
|
|
return ByteString::formatted("({})", string());
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString NameObject::to_byte_string(int) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
|
|
|
|
builder.appendff("/{}", this->name());
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2022-11-25 16:34:06 +08:00
|
|
|
Vector<float> ArrayObject::float_elements() const
|
|
|
|
|
{
|
|
|
|
|
Vector<float> values;
|
|
|
|
|
values.ensure_capacity(m_elements.size());
|
|
|
|
|
for (auto const& value : m_elements) {
|
|
|
|
|
values.append(value.to_float());
|
|
|
|
|
}
|
|
|
|
|
return values;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ArrayObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("[\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
bool first = true;
|
|
|
|
|
|
|
|
|
|
for (auto& element : elements()) {
|
|
|
|
|
if (!first)
|
2023-07-12 12:19:50 -04:00
|
|
|
builder.append("\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
first = false;
|
|
|
|
|
append_indent(builder, indent + 1);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}", element.to_byte_string(indent));
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
|
|
|
|
builder.append(']');
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString DictObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2023-07-12 12:19:50 -04:00
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("<<\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
bool first = true;
|
|
|
|
|
|
|
|
|
|
for (auto& [key, value] : map()) {
|
|
|
|
|
if (!first)
|
2023-07-12 12:19:50 -04:00
|
|
|
builder.append("\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
first = false;
|
|
|
|
|
append_indent(builder, indent + 1);
|
|
|
|
|
builder.appendff("/{} ", key);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}", value.to_byte_string(indent + 1));
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append(">>"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString StreamObject::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.appendff("{}\n", dict()->to_byte_string(indent));
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("stream\n"sv);
|
2021-04-30 18:23:17 -07:00
|
|
|
|
2023-07-12 12:19:50 -04:00
|
|
|
size_t ascii_count = 0;
|
|
|
|
|
for (auto c : bytes()) {
|
|
|
|
|
if (c < 128)
|
|
|
|
|
++ascii_count;
|
|
|
|
|
}
|
2021-04-30 18:23:17 -07:00
|
|
|
|
2023-07-12 12:19:50 -04:00
|
|
|
size_t percentage_ascii = 100;
|
|
|
|
|
if (bytes().size())
|
|
|
|
|
percentage_ascii = ascii_count * 100 / bytes().size();
|
|
|
|
|
bool is_mostly_text = percentage_ascii > 95;
|
|
|
|
|
|
|
|
|
|
if (is_mostly_text) {
|
|
|
|
|
for (auto c : bytes()) {
|
|
|
|
|
if (c < 128)
|
|
|
|
|
builder.append(c);
|
|
|
|
|
else
|
|
|
|
|
builder.appendff("\\{:03o}", c);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
auto string = encode_hex(bytes());
|
2023-07-14 09:06:01 -04:00
|
|
|
StringView view { string };
|
|
|
|
|
while (view.length() > 60) {
|
|
|
|
|
builder.appendff("{}\n", view.substring_view(0, 60));
|
2023-07-14 09:02:22 -04:00
|
|
|
append_indent(builder, indent);
|
2023-07-14 09:06:01 -04:00
|
|
|
view = view.substring_view(60);
|
2023-07-12 12:19:50 -04:00
|
|
|
}
|
2023-07-14 09:02:22 -04:00
|
|
|
|
2023-07-14 09:06:01 -04:00
|
|
|
builder.appendff("{}\n", view);
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("endstream"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString IndirectValue::to_byte_string(int indent) const
|
2021-04-30 18:23:17 -07:00
|
|
|
{
|
|
|
|
|
StringBuilder builder;
|
|
|
|
|
builder.appendff("{} {} obj\n", index(), generation_index());
|
|
|
|
|
append_indent(builder, indent + 1);
|
2023-12-16 17:49:34 +03:30
|
|
|
builder.append(value().to_byte_string(indent + 1));
|
2021-04-30 18:23:17 -07:00
|
|
|
builder.append('\n');
|
|
|
|
|
append_indent(builder, indent);
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("endobj"sv);
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2021-04-30 18:23:17 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|