LibRequests+RequestServer: Begin implementing an HTTP disk cache

This adds a disk cache for HTTP responses received from the network. For
now, we take a rather conservative approach to caching. We don't cache a
response until we're 100% sure it is cacheable (there are heuristics we
can implement in the future based on the absence of specific headers).

The cache is broken into 2 categories of files:

1. An index file. This is a SQL database containing metadata about each
   cache entry (URL, timestamps, etc.).
2. Cache files. Each cached response is in its own file. The file is an
   amalgamation of all info needed to reconstruct an HTTP response. This
   includes the status code, headers, body, etc.

A cache entry is created once we receive the headers for a response. The
index, however, is not updated at this point. We stream the body into
the cache entry as it is received. Once we've successfully cached the
entire body, we create an index entry in the database. If any of these
steps failed along the way, the cache entry is removed and the index is
left untouched.

Subsequent requests are checked for cache hits from the index. If a hit
is found, we read just enough of the cache entry to inform WebContent of
the status code and headers. The body of the response is piped to WC via
syscalls, such that the transfer happens entirely in the kernel; no need
to allocate the memory for the body in userspace (WC still allocates a
buffer to hold the data, of course). If an error occurs while piping the
body, we currently error out the request. There is a FIXME to switch to
a network request.

Cache hits are also validated for freshness before they are used. If a
response has expired, we remove it and its index entry, and proceed with
a network request.
This commit is contained in:
Timothy Flynn 2025-10-07 19:59:21 -04:00 committed by Andreas Kling
parent 411aed96ab
commit 3516a2344f
Notes: github-actions[bot] 2025-10-14 11:41:51 +00:00
13 changed files with 1114 additions and 7 deletions

View file

@ -0,0 +1,346 @@
/*
* Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/JsonArray.h>
#include <AK/JsonArraySerializer.h>
#include <AK/JsonObject.h>
#include <AK/JsonObjectSerializer.h>
#include <AK/JsonValue.h>
#include <AK/ScopeGuard.h>
#include <LibCore/Notifier.h>
#include <LibCore/System.h>
#include <LibFileSystem/FileSystem.h>
#include <RequestServer/Cache/CacheEntry.h>
#include <RequestServer/Cache/CacheIndex.h>
#include <RequestServer/Cache/DiskCache.h>
#include <RequestServer/Cache/Utilities.h>
namespace RequestServer {
static LexicalPath path_for_cache_key(LexicalPath const& cache_directory, u64 cache_key)
{
return cache_directory.append(MUST(String::formatted("{:016x}", cache_key)));
}
ErrorOr<CacheHeader> CacheHeader::read_from_stream(Stream& stream)
{
CacheHeader header;
header.magic = TRY(stream.read_value<u32>());
header.version = TRY(stream.read_value<u32>());
header.url_size = TRY(stream.read_value<u32>());
header.url_hash = TRY(stream.read_value<u32>());
header.status_code = TRY(stream.read_value<u32>());
header.reason_phrase_size = TRY(stream.read_value<u32>());
header.reason_phrase_hash = TRY(stream.read_value<u32>());
header.headers_size = TRY(stream.read_value<u32>());
header.headers_hash = TRY(stream.read_value<u32>());
return header;
}
ErrorOr<void> CacheHeader::write_to_stream(Stream& stream) const
{
TRY(stream.write_value(magic));
TRY(stream.write_value(version));
TRY(stream.write_value(url_size));
TRY(stream.write_value(url_hash));
TRY(stream.write_value(status_code));
TRY(stream.write_value(reason_phrase_size));
TRY(stream.write_value(reason_phrase_hash));
TRY(stream.write_value(headers_size));
TRY(stream.write_value(headers_hash));
return {};
}
ErrorOr<void> CacheFooter::write_to_stream(Stream& stream) const
{
TRY(stream.write_value(data_size));
TRY(stream.write_value(crc32));
return {};
}
ErrorOr<CacheFooter> CacheFooter::read_from_stream(Stream& stream)
{
CacheFooter footer;
footer.data_size = TRY(stream.read_value<u64>());
footer.crc32 = TRY(stream.read_value<u32>());
return footer;
}
CacheEntry::CacheEntry(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, String url, LexicalPath path, CacheHeader cache_header)
: m_disk_cache(disk_cache)
, m_index(index)
, m_cache_key(cache_key)
, m_url(move(url))
, m_path(move(path))
, m_cache_header(cache_header)
{
}
void CacheEntry::remove()
{
(void)FileSystem::remove(m_path.string(), FileSystem::RecursionMode::Disallowed);
m_index.remove_entry(m_cache_key);
}
void CacheEntry::close_and_destory_cache_entry()
{
m_disk_cache.cache_entry_closed({}, *this);
}
ErrorOr<NonnullOwnPtr<CacheEntryWriter>> CacheEntryWriter::create(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, String url, u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const& headers, UnixDateTime request_time)
{
auto path = path_for_cache_key(disk_cache.cache_directory(), cache_key);
auto unbuffered_file = TRY(Core::File::open(path.string(), Core::File::OpenMode::Write));
auto file = TRY(Core::OutputBufferedFile::create(move(unbuffered_file)));
CacheHeader cache_header;
auto result = [&]() -> ErrorOr<void> {
StringBuilder builder;
auto headers_serializer = TRY(JsonArraySerializer<>::try_create(builder));
for (auto const& header : headers.headers()) {
if (is_header_exempted_from_storage(header.name))
continue;
auto header_serializer = TRY(headers_serializer.add_object());
TRY(header_serializer.add("name"sv, header.name));
TRY(header_serializer.add("value"sv, header.value));
TRY(header_serializer.finish());
}
TRY(headers_serializer.finish());
cache_header.url_size = url.byte_count();
cache_header.url_hash = url.hash();
cache_header.status_code = status_code;
cache_header.reason_phrase_size = reason_phrase.has_value() ? reason_phrase->byte_count() : 0;
cache_header.reason_phrase_hash = reason_phrase.has_value() ? reason_phrase->hash() : 0;
auto serialized_headers = builder.string_view();
cache_header.headers_size = serialized_headers.length();
cache_header.headers_hash = serialized_headers.hash();
TRY(file->write_value(cache_header));
TRY(file->write_until_depleted(url));
if (reason_phrase.has_value())
TRY(file->write_until_depleted(*reason_phrase));
TRY(file->write_until_depleted(serialized_headers));
return {};
}();
if (result.is_error()) {
(void)FileSystem::remove(path.string(), FileSystem::RecursionMode::Disallowed);
return result.release_error();
}
return adopt_own(*new CacheEntryWriter { disk_cache, index, cache_key, move(url), path, move(file), cache_header, request_time });
}
CacheEntryWriter::CacheEntryWriter(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, String url, LexicalPath path, NonnullOwnPtr<Core::OutputBufferedFile> file, CacheHeader cache_header, UnixDateTime request_time)
: CacheEntry(disk_cache, index, cache_key, move(url), move(path), cache_header)
, m_file(move(file))
, m_request_time(request_time)
, m_response_time(UnixDateTime::now())
{
}
ErrorOr<void> CacheEntryWriter::write_data(ReadonlyBytes data)
{
if (auto result = m_file->write_until_depleted(data); result.is_error()) {
dbgln("\033[31;1mUnable to write to cache entry for{}\033[0m {}: {}", m_url, result.error());
remove();
close_and_destory_cache_entry();
return result.release_error();
}
m_cache_footer.data_size += data.size();
// FIXME: Update the crc.
dbgln("\033[36;1mSaved {} bytes for\033[0m {}", data.size(), m_url);
return {};
}
ErrorOr<void> CacheEntryWriter::flush()
{
ScopeGuard guard { [&]() { close_and_destory_cache_entry(); } };
if (auto result = m_file->write_value(m_cache_footer); result.is_error()) {
dbgln("\033[31;1mUnable to flush cache entry for{}\033[0m {}: {}", m_url, result.error());
remove();
return result.release_error();
}
m_index.create_entry(m_cache_key, m_url, m_cache_footer.data_size, m_request_time, m_response_time);
dbgln("\033[34;1mFinished caching\033[0m {} ({} bytes)", m_url, m_cache_footer.data_size);
return {};
}
ErrorOr<NonnullOwnPtr<CacheEntryReader>> CacheEntryReader::create(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, u64 data_size)
{
auto path = path_for_cache_key(disk_cache.cache_directory(), cache_key);
auto file = TRY(Core::File::open(path.string(), Core::File::OpenMode::Read));
auto fd = file->fd();
CacheHeader cache_header;
String url;
Optional<String> reason_phrase;
HTTP::HeaderMap headers;
auto result = [&]() -> ErrorOr<void> {
cache_header = TRY(file->read_value<CacheHeader>());
if (cache_header.magic != CacheHeader::CACHE_MAGIC)
return Error::from_string_literal("Magic value mismatch");
if (cache_header.version != CacheHeader::CACHE_VERSION)
return Error::from_string_literal("Version mismatch");
url = TRY(String::from_stream(*file, cache_header.url_size));
if (url.hash() != cache_header.url_hash)
return Error::from_string_literal("URL hash mismatch");
if (cache_header.reason_phrase_size != 0) {
reason_phrase = TRY(String::from_stream(*file, cache_header.reason_phrase_size));
if (reason_phrase->hash() != cache_header.reason_phrase_hash)
return Error::from_string_literal("Reason phrase hash mismatch");
}
auto serialized_headers = TRY(String::from_stream(*file, cache_header.headers_size));
if (serialized_headers.hash() != cache_header.headers_hash)
return Error::from_string_literal("HTTP headers hash mismatch");
auto json_headers = TRY(JsonValue::from_string(serialized_headers));
if (!json_headers.is_array())
return Error::from_string_literal("Expected HTTP headers to be a JSON array");
TRY(json_headers.as_array().try_for_each([&](JsonValue const& header) -> ErrorOr<void> {
if (!header.is_object())
return Error::from_string_literal("Expected headers entry to be a JSON object");
auto name = header.as_object().get_string("name"sv);
auto value = header.as_object().get_string("value"sv);
if (!name.has_value() || !value.has_value())
return Error::from_string_literal("Missing/invalid data in headers entry");
headers.set(name->to_byte_string(), value->to_byte_string());
return {};
}));
return {};
}();
if (result.is_error()) {
(void)FileSystem::remove(path.string(), FileSystem::RecursionMode::Disallowed);
return result.release_error();
}
auto data_offset = sizeof(CacheHeader) + cache_header.url_size + cache_header.reason_phrase_size + cache_header.headers_size;
return adopt_own(*new CacheEntryReader { disk_cache, index, cache_key, move(url), move(path), move(file), fd, cache_header, move(reason_phrase), move(headers), data_offset, data_size });
}
CacheEntryReader::CacheEntryReader(DiskCache& disk_cache, CacheIndex& index, u64 cache_key, String url, LexicalPath path, NonnullOwnPtr<Core::File> file, int fd, CacheHeader cache_header, Optional<String> reason_phrase, HTTP::HeaderMap header_map, u64 data_offset, u64 data_size)
: CacheEntry(disk_cache, index, cache_key, move(url), move(path), cache_header)
, m_file(move(file))
, m_fd(fd)
, m_reason_phrase(move(reason_phrase))
, m_headers(move(header_map))
, m_data_offset(data_offset)
, m_data_size(data_size)
{
}
void CacheEntryReader::pipe_to(int pipe_fd, Function<void(u64)> on_complete, Function<void(u64)> on_error)
{
VERIFY(m_pipe_fd == -1);
m_pipe_fd = pipe_fd;
m_on_pipe_complete = move(on_complete);
m_on_pipe_error = move(on_error);
m_pipe_write_notifier = Core::Notifier::construct(m_pipe_fd, Core::NotificationType::Write);
m_pipe_write_notifier->set_enabled(false);
m_pipe_write_notifier->on_activation = [this]() {
m_pipe_write_notifier->set_enabled(false);
pipe_without_blocking();
};
pipe_without_blocking();
}
void CacheEntryReader::pipe_without_blocking()
{
auto result = Core::System::transfer_file_through_pipe(m_fd, m_pipe_fd, m_data_offset + m_bytes_piped, m_data_size - m_bytes_piped);
if (result.is_error()) {
if (result.error().code() != EAGAIN && result.error().code() != EWOULDBLOCK) {
dbgln("\033[31;1mError transferring cache to pipe for\033[0m {}: {}", m_url, result.error());
if (m_on_pipe_error)
m_on_pipe_error(m_bytes_piped);
close_and_destory_cache_entry();
} else {
m_pipe_write_notifier->set_enabled(true);
}
return;
}
m_bytes_piped += result.value();
if (m_bytes_piped == m_data_size) {
pipe_complete();
return;
}
pipe_without_blocking();
}
void CacheEntryReader::pipe_complete()
{
if (auto result = read_and_validate_footer(); result.is_error()) {
dbgln("\033[31;1mError validating cache entry for\033[0m {}: {}", m_url, result.error());
remove();
if (m_on_pipe_error)
m_on_pipe_error(m_bytes_piped);
} else {
m_index.update_last_access_time(m_cache_key);
if (m_on_pipe_complete)
m_on_pipe_complete(m_bytes_piped);
}
close_and_destory_cache_entry();
}
ErrorOr<void> CacheEntryReader::read_and_validate_footer()
{
TRY(m_file->seek(m_data_offset + m_data_size, SeekMode::SetPosition));
m_cache_footer = TRY(m_file->read_value<CacheFooter>());
if (m_cache_footer.data_size != m_data_size)
return Error::from_string_literal("Invalid data size in footer");
// FIXME: Validate the crc.
return {};
}
}