RequestServer: Store HTTP response headers in the cache index

We currently store response headers in the cache entry file, before the
response body. When we implement cache revalidation, we will need to
update the stored response headers with whatever headers are received
in a 304 response. It's not unlikely that those headers will have a size
that differs from the stored headers. We would then have to rewrite the
entire response body after the new headers.

Instead of dealing with those inefficiencies, let's instead store the
response headers in the cache index. This will allow us to update the
headers with a simple SQL query.
This commit is contained in:
Timothy Flynn 2025-10-29 15:36:33 -04:00 committed by Tim Flynn
parent bf7c5cdf07
commit 20cd19be4d
Notes: github-actions[bot] 2025-11-02 18:04:37 +00:00
7 changed files with 62 additions and 72 deletions

View file

@ -4,11 +4,6 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/JsonArray.h>
#include <AK/JsonArraySerializer.h>
#include <AK/JsonObject.h>
#include <AK/JsonObjectSerializer.h>
#include <AK/JsonValue.h>
#include <AK/ScopeGuard.h>
#include <LibCore/Notifier.h>
#include <LibCore/System.h>
@ -35,8 +30,6 @@ ErrorOr<CacheHeader> CacheHeader::read_from_stream(Stream& stream)
header.status_code = TRY(stream.read_value<u32>());
header.reason_phrase_size = TRY(stream.read_value<u32>());
header.reason_phrase_hash = TRY(stream.read_value<u32>());
header.headers_size = TRY(stream.read_value<u32>());
header.headers_hash = TRY(stream.read_value<u32>());
return header;
}
@ -49,8 +42,6 @@ ErrorOr<void> CacheHeader::write_to_stream(Stream& stream) const
TRY(stream.write_value(status_code));
TRY(stream.write_value(reason_phrase_size));
TRY(stream.write_value(reason_phrase_hash));
TRY(stream.write_value(headers_size));
TRY(stream.write_value(headers_hash));
return {};
}
@ -112,7 +103,7 @@ CacheEntryWriter::CacheEntryWriter(DiskCache& disk_cache, CacheIndex& index, u64
{
}
ErrorOr<void> CacheEntryWriter::write_headers(u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const& headers)
ErrorOr<void> CacheEntryWriter::write_status_and_reason(u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const& headers)
{
if (m_marked_for_deletion) {
close_and_destroy_cache_entry();
@ -133,35 +124,16 @@ ErrorOr<void> CacheEntryWriter::write_headers(u32 status_code, Optional<String>
if (auto freshness = calculate_freshness_lifetime(headers); freshness.is_negative() || freshness.is_zero())
return Error::from_string_literal("Response has already expired");
StringBuilder builder;
auto headers_serializer = TRY(JsonArraySerializer<>::try_create(builder));
for (auto const& header : headers.headers()) {
if (is_header_exempted_from_storage(header.name))
continue;
auto header_serializer = TRY(headers_serializer.add_object());
TRY(header_serializer.add("name"sv, header.name));
TRY(header_serializer.add("value"sv, header.value));
TRY(header_serializer.finish());
}
TRY(headers_serializer.finish());
auto serialized_headers = builder.string_view();
m_cache_header.headers_size = serialized_headers.length();
m_cache_header.headers_hash = serialized_headers.hash();
TRY(m_file->write_value(m_cache_header));
TRY(m_file->write_until_depleted(m_url));
if (reason_phrase.has_value())
TRY(m_file->write_until_depleted(*reason_phrase));
TRY(m_file->write_until_depleted(serialized_headers));
return {};
}();
if (result.is_error()) {
dbgln("\033[31;1mUnable to write headers to cache entry for\033[0m {}: {}", m_url, result.error());
dbgln("\033[31;1mUnable to write status/reason to cache entry for\033[0m {}: {}", m_url, result.error());
remove();
close_and_destroy_cache_entry();
@ -194,7 +166,7 @@ ErrorOr<void> CacheEntryWriter::write_data(ReadonlyBytes data)
return {};
}
ErrorOr<void> CacheEntryWriter::flush()
ErrorOr<void> CacheEntryWriter::flush(HTTP::HeaderMap headers)
{
ScopeGuard guard { [&]() { close_and_destroy_cache_entry(); } };
@ -208,13 +180,13 @@ ErrorOr<void> CacheEntryWriter::flush()
return result.release_error();
}
m_index.create_entry(m_cache_key, m_url, m_cache_footer.data_size, m_request_time, m_response_time);
m_index.create_entry(m_cache_key, m_url, move(headers), m_cache_footer.data_size, m_request_time, m_response_time);
dbgln("\033[34;1mFinished caching\033[0m {} ({} bytes)", m_url, m_cache_footer.data_size);
return {};
}
ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, u64 data_size)
ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, HTTP::HeaderMap headers, u64 data_size)
{
auto path = path_for_cache_key(disk_cache.cache_directory(), cache_key);
@ -225,7 +197,6 @@ ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& dis
String url;
Optional<String> reason_phrase;
HTTP::HeaderMap headers;
auto result = [&]() -> ErrorOr<void> {
cache_header = TRY(file->read_value<CacheHeader>());
@ -245,28 +216,6 @@ ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& dis
return Error::from_string_literal("Reason phrase hash mismatch");
}
auto serialized_headers = TRY(String::from_stream(*file, cache_header.headers_size));
if (serialized_headers.hash() != cache_header.headers_hash)
return Error::from_string_literal("HTTP headers hash mismatch");
auto json_headers = TRY(JsonValue::from_string(serialized_headers));
if (!json_headers.is_array())
return Error::from_string_literal("Expected HTTP headers to be a JSON array");
TRY(json_headers.as_array().try_for_each([&](JsonValue const& header) -> ErrorOr<void> {
if (!header.is_object())
return Error::from_string_literal("Expected headers entry to be a JSON object");
auto name = header.as_object().get_string("name"sv);
auto value = header.as_object().get_string("value"sv);
if (!name.has_value() || !value.has_value())
return Error::from_string_literal("Missing/invalid data in headers entry");
headers.set(name->to_byte_string(), value->to_byte_string());
return {};
}));
return {};
}();
@ -275,7 +224,7 @@ ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& dis
return result.release_error();
}
auto data_offset = sizeof(CacheHeader) + cache_header.url_size + cache_header.reason_phrase_size + cache_header.headers_size;
auto data_offset = sizeof(CacheHeader) + cache_header.url_size + cache_header.reason_phrase_size;
return adopt_own(*new CacheEntryReader { disk_cache, index, cache_key, move(url), move(path), move(file), fd, cache_header, move(reason_phrase), move(headers), data_offset, data_size });
}

View file

@ -33,9 +33,6 @@ struct [[gnu::packed]] CacheHeader {
u32 status_code { 0 };
u32 reason_phrase_size { 0 };
u32 reason_phrase_hash { 0 };
u32 headers_size { 0 };
u32 headers_hash { 0 };
};
struct [[gnu::packed]] CacheFooter {
@ -85,9 +82,9 @@ public:
static ErrorOr<NonnullOwnPtr<CacheEntryWriter>> create(DiskCache&, CacheIndex&, u64 cache_key, String url, UnixDateTime request_time);
virtual ~CacheEntryWriter() override = default;
ErrorOr<void> write_headers(u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const&);
ErrorOr<void> write_status_and_reason(u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const&);
ErrorOr<void> write_data(ReadonlyBytes);
ErrorOr<void> flush();
ErrorOr<void> flush(HTTP::HeaderMap);
private:
CacheEntryWriter(DiskCache&, CacheIndex&, u64 cache_key, String url, LexicalPath, NonnullOwnPtr<Core::OutputBufferedFile>, CacheHeader, UnixDateTime request_time);
@ -100,7 +97,7 @@ private:
class CacheEntryReader : public CacheEntry {
public:
static ErrorOr<NonnullOwnPtr<CacheEntryReader>> create(DiskCache&, CacheIndex&, u64 cache_key, u64 data_size);
static ErrorOr<NonnullOwnPtr<CacheEntryReader>> create(DiskCache&, CacheIndex&, u64 cache_key, HTTP::HeaderMap, u64 data_size);
virtual ~CacheEntryReader() override = default;
void pipe_to(int pipe_fd, Function<void(u64 bytes_piped)> on_complete, Function<void(u64 bytes_piped)> on_error);

View file

@ -4,13 +4,52 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringBuilder.h>
#include <RequestServer/Cache/CacheIndex.h>
#include <RequestServer/Cache/Utilities.h>
#include <RequestServer/Cache/Version.h>
namespace RequestServer {
static constexpr u32 CACHE_METADATA_KEY = 12389u;
static ByteString serialize_headers(HTTP::HeaderMap const& headers)
{
StringBuilder builder;
for (auto const& header : headers.headers()) {
if (is_header_exempted_from_storage(header.name))
continue;
builder.append(header.name);
builder.append(':');
builder.append(header.value);
builder.append('\n');
}
return builder.to_byte_string();
}
static HTTP::HeaderMap deserialize_headers(StringView serialized_headers)
{
HTTP::HeaderMap headers;
serialized_headers.for_each_split_view('\n', SplitBehavior::Nothing, [&](StringView serialized_header) {
auto index = serialized_header.find(':');
if (!index.has_value())
return;
auto name = serialized_header.substring_view(0, *index).trim_whitespace();
if (is_header_exempted_from_storage(name))
return;
auto value = serialized_header.substring_view(*index + 1).trim_whitespace();
headers.set(name, value);
});
return headers;
}
ErrorOr<CacheIndex> CacheIndex::create(Database::Database& database)
{
auto create_cache_metadata_table = TRY(database.prepare_statement(R"#(
@ -45,6 +84,7 @@ ErrorOr<CacheIndex> CacheIndex::create(Database::Database& database)
CREATE TABLE IF NOT EXISTS CacheIndex (
cache_key INTEGER,
url TEXT,
response_headers TEXT,
data_size INTEGER,
request_time INTEGER,
response_time INTEGER,
@ -55,7 +95,7 @@ ErrorOr<CacheIndex> CacheIndex::create(Database::Database& database)
database.execute_statement(create_cache_index_table, {});
Statements statements {};
statements.insert_entry = TRY(database.prepare_statement("INSERT OR REPLACE INTO CacheIndex VALUES (?, ?, ?, ?, ?, ?);"sv));
statements.insert_entry = TRY(database.prepare_statement("INSERT OR REPLACE INTO CacheIndex VALUES (?, ?, ?, ?, ?, ?, ?);"sv));
statements.remove_entry = TRY(database.prepare_statement("DELETE FROM CacheIndex WHERE cache_key = ?;"sv));
statements.remove_all_entries = TRY(database.prepare_statement("DELETE FROM CacheIndex;"sv));
statements.select_entry = TRY(database.prepare_statement("SELECT * FROM CacheIndex WHERE cache_key = ?;"sv));
@ -70,20 +110,21 @@ CacheIndex::CacheIndex(Database::Database& database, Statements statements)
{
}
void CacheIndex::create_entry(u64 cache_key, String url, u64 data_size, UnixDateTime request_time, UnixDateTime response_time)
void CacheIndex::create_entry(u64 cache_key, String url, HTTP::HeaderMap response_headers, u64 data_size, UnixDateTime request_time, UnixDateTime response_time)
{
auto now = UnixDateTime::now();
Entry entry {
.cache_key = cache_key,
.url = move(url),
.response_headers = move(response_headers),
.data_size = data_size,
.request_time = request_time,
.response_time = response_time,
.last_access_time = now,
};
m_database.execute_statement(m_statements.insert_entry, {}, entry.cache_key, entry.url, entry.data_size, entry.request_time, entry.response_time, entry.last_access_time);
m_database.execute_statement(m_statements.insert_entry, {}, entry.cache_key, entry.url, serialize_headers(entry.response_headers), entry.data_size, entry.request_time, entry.response_time, entry.last_access_time);
m_entries.set(cache_key, move(entry));
}
@ -122,12 +163,13 @@ Optional<CacheIndex::Entry&> CacheIndex::find_entry(u64 cache_key)
auto cache_key = m_database.result_column<u64>(statement_id, column++);
auto url = m_database.result_column<String>(statement_id, column++);
auto response_headers = m_database.result_column<ByteString>(statement_id, column++);
auto data_size = m_database.result_column<u64>(statement_id, column++);
auto request_time = m_database.result_column<UnixDateTime>(statement_id, column++);
auto response_time = m_database.result_column<UnixDateTime>(statement_id, column++);
auto last_access_time = m_database.result_column<UnixDateTime>(statement_id, column++);
Entry entry { cache_key, move(url), data_size, request_time, response_time, last_access_time };
Entry entry { cache_key, move(url), deserialize_headers(response_headers), data_size, request_time, response_time, last_access_time };
m_entries.set(cache_key, move(entry));
},
cache_key);

View file

@ -11,6 +11,7 @@
#include <AK/Time.h>
#include <AK/Types.h>
#include <LibDatabase/Database.h>
#include <LibHTTP/HeaderMap.h>
namespace RequestServer {
@ -21,6 +22,7 @@ class CacheIndex {
u64 cache_key { 0 };
String url;
HTTP::HeaderMap response_headers;
u64 data_size { 0 };
UnixDateTime request_time;
@ -31,7 +33,7 @@ class CacheIndex {
public:
static ErrorOr<CacheIndex> create(Database::Database&);
void create_entry(u64 cache_key, String url, u64 data_size, UnixDateTime request_time, UnixDateTime response_time);
void create_entry(u64 cache_key, String url, HTTP::HeaderMap, u64 data_size, UnixDateTime request_time, UnixDateTime response_time);
void remove_entry(u64 cache_key);
void remove_all_entries();

View file

@ -75,7 +75,7 @@ Variant<Optional<CacheEntryReader&>, DiskCache::CacheHasOpenEntry> DiskCache::op
return Optional<CacheEntryReader&> {};
}
auto cache_entry = CacheEntryReader::create(*this, m_index, cache_key, index_entry->data_size);
auto cache_entry = CacheEntryReader::create(*this, m_index, cache_key, index_entry->response_headers, index_entry->data_size);
if (cache_entry.is_error()) {
dbgln("\033[31;1mUnable to open cache entry for\033[0m {}: {}", request.url(), cache_entry.error());
m_index.remove_entry(cache_key);

View file

@ -11,6 +11,6 @@
namespace RequestServer {
// Increment this version when a breaking change is made to the cache index or cache entry formats.
static constexpr inline u32 CACHE_VERSION = 1u;
static constexpr inline u32 CACHE_VERSION = 2u;
}

View file

@ -117,7 +117,7 @@ Request::~Request()
curl_slist_free_all(string_list);
if (m_cache_entry_writer.has_value())
(void)m_cache_entry_writer->flush();
(void)m_cache_entry_writer->flush(move(m_response_headers));
}
void Request::notify_request_unblocked(Badge<DiskCache>)
@ -520,7 +520,7 @@ void Request::transfer_headers_to_client_if_needed()
m_client.async_headers_became_available(m_request_id, m_response_headers, m_status_code, m_reason_phrase);
if (m_cache_entry_writer.has_value()) {
if (m_cache_entry_writer->write_headers(m_status_code, m_reason_phrase, m_response_headers).is_error())
if (m_cache_entry_writer->write_status_and_reason(m_status_code, m_reason_phrase, m_response_headers).is_error())
m_cache_entry_writer.clear();
}
}