ladybird/Libraries/LibHTTP/Cache/CacheIndex.cpp
Aliaksandr Kalenik 14dc9e8ca2 LibHTTP: Short-circuit CacheIndex eviction when under its size limit
remove_entries_exceeding_cache_limit() is called after every network
response, but the cache is usually still under budget and nothing needs
to evict. Every one of those calls currently still runs the
window-function eviction SQL over the whole CacheIndex table just to
conclude there is nothing to do.

Short-circuit the call when the cache is already within its configured
size limit. To make that check cheap, maintain m_total_estimated_size
as a running total of the cache's estimated byte size, so the no-op
case becomes a single u64 compare and the DB is only touched when
there is real work.

Bookkeeping:
- Seed the total in CacheIndex::create() via a new
  select_total_estimated_size statement (COALESCE(..., 0) so an empty
  index returns 0 rather than NULL).
- Each Entry caches serialized_request_headers_size and
  serialized_response_headers_size so we don't re-serialize to
  recompute its footprint; Entry::estimated_size() centralizes the
  arithmetic.
- create_entry() adds the new entry's size. Any row it displaces is
  removed via DELETE ... RETURNING so the total stays accurate even
  for entries that were never loaded into m_entries.
- remove_entry() and the bulk DELETE statements were extended with
  the same RETURNING clause for the same reason.
- update_response_headers() shifts the total by the signed delta
  between old and new serialized header size.

Also COALESCEs estimate_cache_size_accessed_since over an empty table
to 0 so callers don't have to special-case NULL.
2026-04-19 01:31:37 +02:00

387 lines
15 KiB
C++

/*
* Copyright (c) 2025-2026, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Debug.h>
#include <AK/StringBuilder.h>
#include <LibFileSystem/FileSystem.h>
#include <LibHTTP/Cache/CacheIndex.h>
#include <LibHTTP/Cache/Utilities.h>
#include <LibHTTP/Cache/Version.h>
namespace HTTP {
static constexpr u32 CACHE_METADATA_KEY = 12389u;
static ByteString serialize_headers(HeaderList const& headers)
{
StringBuilder builder;
for (auto const& header : headers) {
builder.append(header.name);
builder.append(':');
builder.append(header.value);
builder.append('\n');
}
return builder.to_byte_string();
}
static NonnullRefPtr<HeaderList> deserialize_headers(StringView serialized_headers)
{
auto headers = HeaderList::create();
serialized_headers.for_each_split_view('\n', SplitBehavior::Nothing, [&](StringView serialized_header) {
auto index = serialized_header.find(':');
if (!index.has_value())
return;
auto name = serialized_header.substring_view(0, *index);
if (is_header_exempted_from_storage(name))
return;
auto value = serialized_header.substring_view(*index + 1);
headers->append({ name, value });
});
return headers;
}
ErrorOr<CacheIndex> CacheIndex::create(Database::Database& database, LexicalPath const& cache_directory)
{
auto create_cache_metadata_table = TRY(database.prepare_statement(R"#(
CREATE TABLE IF NOT EXISTS CacheMetadata (
metadata_key INTEGER,
version INTEGER,
PRIMARY KEY(metadata_key)
);
)#"sv));
database.execute_statement(create_cache_metadata_table, {});
auto read_cache_version = TRY(database.prepare_statement("SELECT version FROM CacheMetadata WHERE metadata_key = ?;"sv));
auto cache_version = 0u;
database.execute_statement(
read_cache_version,
[&](auto statement_id) { cache_version = database.result_column<u32>(statement_id, 0); },
CACHE_METADATA_KEY);
if (cache_version != CACHE_VERSION) {
if (cache_version != 0)
dbgln_if(HTTP_DISK_CACHE_DEBUG, "\033[36m[disk]\033[0m \033[31;1mDisk cache version mismatch:\033[0m stored version = {}, new version = {}", cache_version, CACHE_VERSION);
// FIXME: We should more elegantly handle minor changes, i.e. use ALTER TABLE to add fields to CacheIndex.
auto delete_cache_index_table = TRY(database.prepare_statement("DROP TABLE IF EXISTS CacheIndex;"sv));
database.execute_statement(delete_cache_index_table, {});
auto set_cache_version = TRY(database.prepare_statement("INSERT OR REPLACE INTO CacheMetadata VALUES (?, ?);"sv));
database.execute_statement(set_cache_version, {}, CACHE_METADATA_KEY, CACHE_VERSION);
}
auto create_cache_index_table = TRY(database.prepare_statement(R"#(
CREATE TABLE IF NOT EXISTS CacheIndex (
cache_key INTEGER,
vary_key INTEGER,
url TEXT,
request_headers BLOB,
response_headers BLOB,
data_size INTEGER,
request_time INTEGER,
response_time INTEGER,
last_access_time INTEGER,
PRIMARY KEY(cache_key, vary_key)
);
)#"sv));
database.execute_statement(create_cache_index_table, {});
Statements statements {};
statements.insert_entry = TRY(database.prepare_statement("INSERT OR REPLACE INTO CacheIndex VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);"sv));
statements.remove_entry = TRY(database.prepare_statement(R"#(
DELETE FROM CacheIndex
WHERE cache_key = ? AND vary_key = ?
RETURNING data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers);
)#"sv));
statements.remove_entries_accessed_since = TRY(database.prepare_statement(R"#(
DELETE FROM CacheIndex
WHERE last_access_time >= ?
RETURNING cache_key, vary_key, data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers);
)#"sv));
statements.select_entries = TRY(database.prepare_statement("SELECT * FROM CacheIndex WHERE cache_key = ?;"sv));
statements.update_response_headers = TRY(database.prepare_statement("UPDATE CacheIndex SET response_headers = ? WHERE cache_key = ? AND vary_key = ?;"sv));
statements.update_last_access_time = TRY(database.prepare_statement("UPDATE CacheIndex SET last_access_time = ? WHERE cache_key = ? AND vary_key = ?;"sv));
statements.remove_entries_exceeding_cache_limit = TRY(database.prepare_statement(R"#(
WITH RankedCacheIndex AS (
SELECT
cache_key,
vary_key,
SUM(data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers))
OVER (ORDER BY last_access_time DESC)
AS cumulative_estimated_size
FROM CacheIndex
)
DELETE FROM CacheIndex
WHERE (cache_key, vary_key) IN (
SELECT cache_key, vary_key
FROM RankedCacheIndex
WHERE cumulative_estimated_size > ?
)
RETURNING cache_key, vary_key, data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers);
)#"sv));
statements.estimate_cache_size_accessed_since = TRY(database.prepare_statement(R"#(
SELECT COALESCE(SUM(data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers)), 0)
FROM CacheIndex
WHERE last_access_time >= ?;
)#"sv));
statements.select_total_estimated_size = TRY(database.prepare_statement(R"#(
SELECT COALESCE(SUM(data_size + OCTET_LENGTH(request_headers) + OCTET_LENGTH(response_headers)), 0)
FROM CacheIndex;
)#"sv));
auto disk_space = TRY(FileSystem::compute_disk_space(cache_directory));
auto maximum_disk_cache_size = compute_maximum_disk_cache_size(disk_space.free_bytes);
Limits limits {
.free_disk_space = disk_space.free_bytes,
.maximum_disk_cache_size = maximum_disk_cache_size,
.maximum_disk_cache_entry_size = compute_maximum_disk_cache_entry_size(maximum_disk_cache_size),
};
u64 total_estimated_size { 0 };
database.execute_statement(
statements.select_total_estimated_size,
[&](auto statement_id) { total_estimated_size = database.result_column<u64>(statement_id, 0); });
return CacheIndex { database, statements, limits, total_estimated_size };
}
CacheIndex::CacheIndex(Database::Database& database, Statements statements, Limits limits, u64 total_estimated_size)
: m_database(database)
, m_statements(statements)
, m_limits(limits)
, m_total_estimated_size(total_estimated_size)
{
}
ErrorOr<void> CacheIndex::create_entry(u64 cache_key, u64 vary_key, String url, NonnullRefPtr<HeaderList> request_headers, NonnullRefPtr<HeaderList> response_headers, u64 data_size, UnixDateTime request_time, UnixDateTime response_time)
{
auto now = UnixDateTime::now();
auto remove_exempted_headers = [](HeaderList& headers) {
headers.delete_all_matching([&](auto const& header) {
return is_header_exempted_from_storage(header.name);
});
};
remove_exempted_headers(request_headers);
remove_exempted_headers(response_headers);
auto serialized_request_headers = serialize_headers(request_headers);
auto serialized_response_headers = serialize_headers(response_headers);
Entry entry {
.vary_key = vary_key,
.url = move(url),
.request_headers = move(request_headers),
.response_headers = move(response_headers),
.data_size = data_size,
.serialized_request_headers_size = static_cast<u64>(serialized_request_headers.length()),
.serialized_response_headers_size = static_cast<u64>(serialized_response_headers.length()),
.request_time = request_time,
.response_time = response_time,
.last_access_time = now,
};
auto entry_size = entry.estimated_size();
if (entry_size > m_limits.maximum_disk_cache_entry_size)
return Error::from_string_literal("Cache entry size exceeds allowed maximum");
m_database->execute_statement(
m_statements.remove_entry,
[&](auto statement_id) {
auto removed_size = m_database->result_column<u64>(statement_id, 0);
m_total_estimated_size -= removed_size;
},
cache_key,
vary_key);
auto& entries = m_entries.ensure(cache_key);
auto existing_entry_index = entries.find_first_index_if([&](auto const& existing_entry) {
return existing_entry.vary_key == vary_key;
});
m_database->execute_statement(m_statements.insert_entry, {}, cache_key, vary_key, entry.url, serialized_request_headers, serialized_response_headers, entry.data_size, entry.request_time, entry.response_time, entry.last_access_time);
if (existing_entry_index.has_value())
entries[*existing_entry_index] = move(entry);
else
entries.append(move(entry));
m_total_estimated_size += entry_size;
return {};
}
void CacheIndex::remove_entry(u64 cache_key, u64 vary_key)
{
m_database->execute_statement(
m_statements.remove_entry,
[&](auto statement_id) {
auto removed_size = m_database->result_column<u64>(statement_id, 0);
m_total_estimated_size -= removed_size;
},
cache_key,
vary_key);
delete_entry(cache_key, vary_key);
}
void CacheIndex::remove_entries_exceeding_cache_limit(Function<void(u64 cache_key, u64 vary_key)> on_entry_removed)
{
if (m_total_estimated_size <= m_limits.maximum_disk_cache_size)
return;
m_database->execute_statement(
m_statements.remove_entries_exceeding_cache_limit,
[&](auto statement_id) {
auto cache_key = m_database->result_column<u64>(statement_id, 0);
auto vary_key = m_database->result_column<u64>(statement_id, 1);
auto removed_size = m_database->result_column<u64>(statement_id, 2);
m_total_estimated_size -= removed_size;
delete_entry(cache_key, vary_key);
if (on_entry_removed)
on_entry_removed(cache_key, vary_key);
},
m_limits.maximum_disk_cache_size);
}
void CacheIndex::remove_entries_accessed_since(UnixDateTime since, Function<void(u64 cache_key, u64 vary_key)> on_entry_removed)
{
m_database->execute_statement(
m_statements.remove_entries_accessed_since,
[&](auto statement_id) {
auto cache_key = m_database->result_column<u64>(statement_id, 0);
auto vary_key = m_database->result_column<u64>(statement_id, 1);
auto removed_size = m_database->result_column<u64>(statement_id, 2);
m_total_estimated_size -= removed_size;
delete_entry(cache_key, vary_key);
if (on_entry_removed)
on_entry_removed(cache_key, vary_key);
},
since);
}
void CacheIndex::update_response_headers(u64 cache_key, u64 vary_key, NonnullRefPtr<HeaderList> response_headers)
{
auto entry = get_entry(cache_key, vary_key);
if (!entry.has_value())
return;
auto serialized_response_headers = serialize_headers(response_headers);
auto serialized_response_headers_size = static_cast<u64>(serialized_response_headers.length());
m_database->execute_statement(m_statements.update_response_headers, {}, serialized_response_headers, cache_key, vary_key);
m_total_estimated_size -= entry->serialized_response_headers_size;
m_total_estimated_size += serialized_response_headers_size;
entry->response_headers = move(response_headers);
entry->serialized_response_headers_size = serialized_response_headers_size;
}
void CacheIndex::update_last_access_time(u64 cache_key, u64 vary_key)
{
auto entry = get_entry(cache_key, vary_key);
if (!entry.has_value())
return;
auto now = UnixDateTime::now();
m_database->execute_statement(m_statements.update_last_access_time, {}, now, cache_key, vary_key);
entry->last_access_time = now;
}
Optional<CacheIndex::Entry const&> CacheIndex::find_entry(u64 cache_key, HeaderList const& request_headers)
{
auto& entries = m_entries.ensure(cache_key, [&]() {
Vector<Entry> entries;
m_database->execute_statement(
m_statements.select_entries,
[&](auto statement_id) {
int column = 1; // Skip the cache_key column.
auto vary_key = m_database->result_column<u64>(statement_id, column++);
auto url = m_database->result_column<String>(statement_id, column++);
auto request_headers = m_database->result_column<ByteString>(statement_id, column++);
auto response_headers = m_database->result_column<ByteString>(statement_id, column++);
auto data_size = m_database->result_column<u64>(statement_id, column++);
auto request_time = m_database->result_column<UnixDateTime>(statement_id, column++);
auto response_time = m_database->result_column<UnixDateTime>(statement_id, column++);
auto last_access_time = m_database->result_column<UnixDateTime>(statement_id, column++);
entries.empend(vary_key, move(url), deserialize_headers(request_headers), deserialize_headers(response_headers), data_size, request_headers.length(), response_headers.length(), request_time, response_time, last_access_time);
},
cache_key);
return entries;
});
return find_value(entries, [&](auto const& entry) {
return create_vary_key(request_headers, entry.response_headers) == entry.vary_key;
});
}
Optional<CacheIndex::Entry&> CacheIndex::get_entry(u64 cache_key, u64 vary_key)
{
auto entries = m_entries.get(cache_key);
if (!entries.has_value())
return {};
return find_value(*entries, [&](auto const& entry) { return entry.vary_key == vary_key; });
}
void CacheIndex::delete_entry(u64 cache_key, u64 vary_key)
{
auto entries = m_entries.get(cache_key);
if (!entries.has_value())
return;
entries->remove_first_matching([&](auto const& entry) { return entry.vary_key == vary_key; });
if (entries->is_empty())
m_entries.remove(cache_key);
}
Requests::CacheSizes CacheIndex::estimate_cache_size_accessed_since(UnixDateTime since)
{
Requests::CacheSizes sizes;
m_database->execute_statement(
m_statements.estimate_cache_size_accessed_since,
[&](auto statement_id) { sizes.since_requested_time = m_database->result_column<u64>(statement_id, 0); },
since);
m_database->execute_statement(
m_statements.estimate_cache_size_accessed_since,
[&](auto statement_id) { sizes.total = m_database->result_column<u64>(statement_id, 0); },
UnixDateTime::earliest());
return sizes;
}
void CacheIndex::set_maximum_disk_cache_size(u64 maximum_disk_cache_size)
{
if (maximum_disk_cache_size == m_limits.maximum_disk_cache_size)
return;
m_limits.maximum_disk_cache_size = compute_maximum_disk_cache_size(m_limits.free_disk_space, maximum_disk_cache_size);
m_limits.maximum_disk_cache_entry_size = compute_maximum_disk_cache_entry_size(m_limits.maximum_disk_cache_size);
}
}