2021-04-30 18:33:13 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
#include <AK/ByteString.h>
|
2021-04-30 18:33:13 -07:00
|
|
|
#include <AK/Debug.h>
|
|
|
|
|
#include <AK/Function.h>
|
|
|
|
|
#include <AK/ScopeGuard.h>
|
2021-09-16 21:28:39 +02:00
|
|
|
#include <AK/Span.h>
|
2021-11-11 13:43:44 +03:30
|
|
|
#include <AK/Vector.h>
|
2023-01-15 12:31:58 +08:00
|
|
|
#include <LibPDF/Error.h>
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
namespace PDF {
|
|
|
|
|
|
|
|
|
|
class Reader {
|
|
|
|
|
public:
|
2021-11-11 01:06:34 +01:00
|
|
|
explicit Reader(ReadonlyBytes bytes)
|
2021-04-30 18:33:13 -07:00
|
|
|
: m_bytes(bytes)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-11 01:06:34 +01:00
|
|
|
ALWAYS_INLINE ReadonlyBytes bytes() const { return m_bytes; }
|
2021-04-30 18:33:13 -07:00
|
|
|
ALWAYS_INLINE size_t offset() const { return m_offset; }
|
|
|
|
|
|
|
|
|
|
bool done() const
|
|
|
|
|
{
|
|
|
|
|
if (m_forwards)
|
|
|
|
|
return offset() >= bytes().size();
|
|
|
|
|
return m_offset < 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t remaining() const
|
|
|
|
|
{
|
|
|
|
|
if (done())
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (m_forwards)
|
2022-11-10 23:09:40 +01:00
|
|
|
return bytes().size() - offset();
|
2021-04-30 18:33:13 -07:00
|
|
|
return offset() + 1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-23 00:28:30 -04:00
|
|
|
void move_by(ssize_t count)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
if (m_forwards) {
|
2023-10-23 00:28:30 -04:00
|
|
|
m_offset += count;
|
2021-04-30 18:33:13 -07:00
|
|
|
} else {
|
2023-10-23 00:28:30 -04:00
|
|
|
m_offset -= count;
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-26 22:52:05 -07:00
|
|
|
template<typename T = char>
|
|
|
|
|
T read()
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2022-10-17 00:06:11 +02:00
|
|
|
T value = reinterpret_cast<T const*>(m_bytes.offset(m_offset))[0];
|
2021-05-26 22:52:05 -07:00
|
|
|
move_by(sizeof(T));
|
|
|
|
|
return value;
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
|
|
|
|
|
2023-01-15 12:31:58 +08:00
|
|
|
template<typename T = char>
|
|
|
|
|
PDFErrorOr<T> try_read()
|
|
|
|
|
{
|
2023-07-12 14:04:26 -04:00
|
|
|
if (sizeof(T) + m_offset > m_bytes.size()) {
|
2023-12-16 17:49:34 +03:30
|
|
|
auto message = ByteString::formatted("Cannot read {} bytes at offset {} of ReadonlyBytes of size {}", sizeof(T), m_offset, m_bytes.size());
|
2023-01-15 12:31:58 +08:00
|
|
|
return Error { Error::Type::Parse, message };
|
|
|
|
|
}
|
|
|
|
|
return read<T>();
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
char peek(size_t shift = 0) const
|
|
|
|
|
{
|
|
|
|
|
auto offset = m_offset + shift * (m_forwards ? 1 : -1);
|
|
|
|
|
return static_cast<char>(m_bytes.at(offset));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename... T>
|
|
|
|
|
bool matches_any(T... elements) const
|
|
|
|
|
{
|
|
|
|
|
if (done())
|
|
|
|
|
return false;
|
|
|
|
|
auto ch = peek();
|
|
|
|
|
return ((ch == elements) || ...);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool matches(char ch) const
|
|
|
|
|
{
|
|
|
|
|
return !done() && peek() == ch;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-01 20:58:27 +03:00
|
|
|
bool matches(char const* chars) const
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString string(chars);
|
2021-04-30 18:33:13 -07:00
|
|
|
if (remaining() < string.length())
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!m_forwards)
|
|
|
|
|
string = string.reverse();
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < string.length(); i++) {
|
|
|
|
|
if (peek(i) != string[i])
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename T = char>
|
|
|
|
|
void move_to(size_t offset)
|
|
|
|
|
{
|
2023-10-17 08:43:21 -04:00
|
|
|
VERIFY(offset <= m_bytes.size());
|
2021-04-30 18:33:13 -07:00
|
|
|
m_offset = static_cast<ssize_t>(offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void move_until(char ch)
|
|
|
|
|
{
|
|
|
|
|
while (!done() && peek() != ch)
|
|
|
|
|
move_by(1);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-04 21:02:28 +01:00
|
|
|
void move_until(AK::Function<bool(char)> predicate)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
while (!done() && !predicate(peek()))
|
|
|
|
|
move_by(1);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-04 21:02:28 +01:00
|
|
|
ALWAYS_INLINE void move_while(AK::Function<bool(char)> predicate)
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
|
|
|
|
move_until([&predicate](char t) { return !predicate(t); });
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-13 22:46:42 -05:00
|
|
|
static bool is_eol(char);
|
|
|
|
|
static bool is_whitespace(char);
|
2024-01-03 17:28:46 -05:00
|
|
|
static bool is_non_eol_whitespace(char);
|
2023-11-13 22:46:42 -05:00
|
|
|
|
2022-08-15 11:02:38 +02:00
|
|
|
bool matches_eol() const;
|
|
|
|
|
bool matches_whitespace() const;
|
2024-01-03 17:28:46 -05:00
|
|
|
bool matches_non_eol_whitespace() const;
|
2022-08-15 11:02:38 +02:00
|
|
|
bool matches_number() const;
|
|
|
|
|
bool matches_delimiter() const;
|
|
|
|
|
bool matches_regular_character() const;
|
|
|
|
|
|
|
|
|
|
bool consume_eol();
|
|
|
|
|
bool consume_whitespace();
|
2024-01-03 17:28:46 -05:00
|
|
|
bool consume_non_eol_whitespace();
|
2022-08-15 11:02:38 +02:00
|
|
|
char consume();
|
|
|
|
|
void consume(int amount);
|
|
|
|
|
bool consume(char);
|
|
|
|
|
|
2021-04-30 18:33:13 -07:00
|
|
|
ALWAYS_INLINE void set_reading_forwards() { m_forwards = true; }
|
|
|
|
|
ALWAYS_INLINE void set_reading_backwards() { m_forwards = false; }
|
|
|
|
|
|
|
|
|
|
ALWAYS_INLINE void save() { m_saved_offsets.append(m_offset); }
|
|
|
|
|
ALWAYS_INLINE void load() { m_offset = m_saved_offsets.take_last(); }
|
|
|
|
|
ALWAYS_INLINE void discard() { m_saved_offsets.take_last(); }
|
|
|
|
|
|
2021-05-08 14:57:49 -07:00
|
|
|
#ifdef PDF_DEBUG
|
|
|
|
|
void dump_state() const
|
2021-04-30 18:33:13 -07:00
|
|
|
{
|
2021-05-22 15:25:34 -07:00
|
|
|
dbgln("Reader State (offset={} size={})", offset(), bytes().size());
|
2021-04-30 18:33:13 -07:00
|
|
|
|
2021-05-22 15:25:34 -07:00
|
|
|
size_t from = max(0, static_cast<int>(offset()) - 10);
|
2021-04-30 18:33:13 -07:00
|
|
|
size_t to = min(bytes().size() - 1, offset() + 10);
|
|
|
|
|
|
|
|
|
|
for (auto i = from; i <= to; i++) {
|
|
|
|
|
char value = static_cast<char>(bytes().at(i));
|
2023-12-16 17:49:34 +03:30
|
|
|
auto line = ByteString::formatted(" {}: '{}' (value={:3d}) ", i, value, static_cast<u8>(value));
|
2021-05-22 15:25:34 -07:00
|
|
|
if (i == offset()) {
|
|
|
|
|
dbgln("{} <<< current location, forwards={}", line, m_forwards);
|
|
|
|
|
} else {
|
|
|
|
|
dbgln("{}", line);
|
|
|
|
|
}
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
2021-05-22 15:25:34 -07:00
|
|
|
dbgln();
|
2021-04-30 18:33:13 -07:00
|
|
|
}
|
2021-05-08 14:57:49 -07:00
|
|
|
#endif
|
2021-04-30 18:33:13 -07:00
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
ReadonlyBytes m_bytes;
|
|
|
|
|
ssize_t m_offset { 0 };
|
|
|
|
|
Vector<ssize_t> m_saved_offsets;
|
|
|
|
|
bool m_forwards { true };
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|