RequestServer: Implement stale cache revalidation

When a request becomes stale, we will now issue a revalidation request
(if the response indicates it may be revalidated). We do this by issuing
a normal fetch request, with If-None-Match and/or If-Modified-Since
request headers.

If the server replies with an HTTP 304 status, we update the stored
response headers to match the 304's headers, and serve the response to
the client from the cache.

If the server replies with any other code, we remove the cache entry.
We will open a new cache entry to cache the new response, if possible.
This commit is contained in:
Timothy Flynn 2025-10-28 17:09:35 -04:00 committed by Tim Flynn
parent 3d45a209b6
commit a4e3890c05
Notes: github-actions[bot] 2025-11-02 18:04:24 +00:00
9 changed files with 263 additions and 65 deletions

View file

@ -121,7 +121,12 @@ ErrorOr<void> CacheEntryWriter::write_status_and_reason(u32 status_code, Optiona
if (!is_cacheable(status_code, response_headers))
return Error::from_string_literal("Response is not cacheable");
if (auto freshness = calculate_freshness_lifetime(response_headers); freshness.is_negative() || freshness.is_zero())
auto freshness_lifetime = calculate_freshness_lifetime(response_headers);
auto current_age = calculate_age(response_headers, m_request_time, m_response_time);
// We can cache already-expired responses if there are other cache directives that allow us to revalidate the
// response on subsequent requests. For example, `Cache-Control: max-age=0, must-revalidate`.
if (cache_lifetime_status(response_headers, freshness_lifetime, current_age) == CacheLifetimeStatus::Expired)
return Error::from_string_literal("Response has already expired");
TRY(m_file->write_value(m_cache_header));
@ -240,6 +245,22 @@ CacheEntryReader::CacheEntryReader(DiskCache& disk_cache, CacheIndex& index, u64
{
}
void CacheEntryReader::revalidation_succeeded(HTTP::HeaderMap const& response_headers)
{
dbgln("\033[34;1mCache revalidation succeeded for\033[0m {}", m_url);
update_header_fields(m_response_headers, response_headers);
m_index.update_response_headers(m_cache_key, m_response_headers);
}
void CacheEntryReader::revalidation_failed()
{
dbgln("\033[33;1mCache revalidation failed for\033[0m {}", m_url);
remove();
close_and_destroy_cache_entry();
}
void CacheEntryReader::pipe_to(int pipe_fd, Function<void(u64)> on_complete, Function<void(u64)> on_error)
{
VERIFY(m_pipe_fd == -1);

View file

@ -100,6 +100,12 @@ public:
static ErrorOr<NonnullOwnPtr<CacheEntryReader>> create(DiskCache&, CacheIndex&, u64 cache_key, HTTP::HeaderMap, u64 data_size);
virtual ~CacheEntryReader() override = default;
bool must_revalidate() const { return m_must_revalidate; }
void set_must_revalidate() { m_must_revalidate = true; }
void revalidation_succeeded(HTTP::HeaderMap const&);
void revalidation_failed();
void pipe_to(int pipe_fd, Function<void(u64 bytes_piped)> on_complete, Function<void(u64 bytes_piped)> on_error);
u32 status_code() const { return m_cache_header.status_code; }
@ -128,6 +134,8 @@ private:
Optional<String> m_reason_phrase;
HTTP::HeaderMap m_response_headers;
bool m_must_revalidate { false };
u64 const m_data_offset { 0 };
u64 const m_data_size { 0 };
};

View file

@ -99,6 +99,7 @@ ErrorOr<CacheIndex> CacheIndex::create(Database::Database& database)
statements.remove_entry = TRY(database.prepare_statement("DELETE FROM CacheIndex WHERE cache_key = ?;"sv));
statements.remove_all_entries = TRY(database.prepare_statement("DELETE FROM CacheIndex;"sv));
statements.select_entry = TRY(database.prepare_statement("SELECT * FROM CacheIndex WHERE cache_key = ?;"sv));
statements.update_response_headers = TRY(database.prepare_statement("UPDATE CacheIndex SET response_headers = ? WHERE cache_key = ?;"sv));
statements.update_last_access_time = TRY(database.prepare_statement("UPDATE CacheIndex SET last_access_time = ? WHERE cache_key = ?;"sv));
return CacheIndex { database, statements };
@ -140,6 +141,16 @@ void CacheIndex::remove_all_entries()
m_entries.clear();
}
void CacheIndex::update_response_headers(u64 cache_key, HTTP::HeaderMap response_headers)
{
auto entry = m_entries.get(cache_key);
if (!entry.has_value())
return;
m_database.execute_statement(m_statements.update_response_headers, {}, serialize_headers(response_headers), cache_key);
entry->response_headers = move(response_headers);
}
void CacheIndex::update_last_access_time(u64 cache_key)
{
auto entry = m_entries.get(cache_key);

View file

@ -39,6 +39,7 @@ public:
Optional<Entry&> find_entry(u64 cache_key);
void update_response_headers(u64 cache_key, HTTP::HeaderMap);
void update_last_access_time(u64 cache_key);
private:
@ -47,6 +48,7 @@ private:
Database::StatementID remove_entry { 0 };
Database::StatementID remove_all_entries { 0 };
Database::StatementID select_entry { 0 };
Database::StatementID update_response_headers { 0 };
Database::StatementID update_last_access_time { 0 };
};

View file

@ -83,17 +83,30 @@ Variant<Optional<CacheEntryReader&>, DiskCache::CacheHasOpenEntry> DiskCache::op
return Optional<CacheEntryReader&> {};
}
auto freshness_lifetime = calculate_freshness_lifetime(cache_entry.value()->response_headers());
auto current_age = calculate_age(cache_entry.value()->response_headers(), index_entry->request_time, index_entry->response_time);
auto const& response_headers = cache_entry.value()->response_headers();
auto freshness_lifetime = calculate_freshness_lifetime(response_headers);
auto current_age = calculate_age(response_headers, index_entry->request_time, index_entry->response_time);
if (!is_response_fresh(freshness_lifetime, current_age)) {
switch (cache_lifetime_status(response_headers, freshness_lifetime, current_age)) {
case CacheLifetimeStatus::Fresh:
dbgln("\033[32;1mOpened disk cache entry for\033[0m {} (lifetime={}s age={}s) ({} bytes)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds(), index_entry->data_size);
break;
case CacheLifetimeStatus::Expired:
dbgln("\033[33;1mCache entry expired for\033[0m {} (lifetime={}s age={}s)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds());
cache_entry.value()->remove();
return Optional<CacheEntryReader&> {};
}
dbgln("\033[32;1mOpened disk cache entry for\033[0m {} (lifetime={}s age={}s) ({} bytes)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds(), index_entry->data_size);
case CacheLifetimeStatus::MustRevalidate:
// We will hold an exclusive lock on the cache entry for revalidation requests.
if (check_if_cache_has_open_entry(request, cache_key, CheckReaderEntries::Yes))
return Optional<CacheEntryReader&> {};
dbgln("\033[36;1mMust revalidate disk cache entry for\033[0m {} (lifetime={}s age={}s)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds());
cache_entry.value()->set_must_revalidate();
break;
}
auto* cache_entry_pointer = cache_entry.value().ptr();
m_open_cache_entries.ensure(cache_key).append(cache_entry.release_value());
@ -113,14 +126,17 @@ bool DiskCache::check_if_cache_has_open_entry(Request& request, u64 cache_key, C
m_requests_waiting_completion.ensure(cache_key).append(request);
return true;
}
// We allow concurrent readers unless another reader is open for revalidation. That reader will issue the network
// request, which may then result in the cache entry being updated or deleted.
if (check_reader_entries == CheckReaderEntries::Yes || as<CacheEntryReader>(*open_entry).must_revalidate()) {
dbgln("\033[36;1mDeferring disk cache entry for\033[0m {} (waiting for existing reader)", request.url());
m_requests_waiting_completion.ensure(cache_key).append(request);
return true;
}
}
if (check_reader_entries == CheckReaderEntries::No)
return false;
dbgln("\033[36;1mDeferring disk cache entry for\033[0m {} (waiting for existing reader)", request.url());
m_requests_waiting_completion.ensure(cache_key).append(request);
return true;
return false;
}
void DiskCache::clear_cache()

View file

@ -114,12 +114,6 @@ bool is_cacheable(u32 status_code, HTTP::HeaderMap const& headers)
// - a cache extension that allows it to be cached (see Section 5.2.3); or
// - a status code that is defined as heuristically cacheable (see Section 4.2.2).
// FIXME: Implement cache revalidation.
if (cache_control->contains("no-cache"sv, CaseSensitivity::CaseInsensitive))
return false;
if (cache_control->contains("revalidate"sv, CaseSensitivity::CaseInsensitive))
return false;
return true;
}
@ -216,10 +210,84 @@ AK::Duration calculate_age(HTTP::HeaderMap const& headers, UnixDateTime request_
return AK::Duration::from_seconds(current_age);
}
// https://httpwg.org/specs/rfc9111.html#expiration.model
bool is_response_fresh(AK::Duration freshness_lifetime, AK::Duration current_age)
CacheLifetimeStatus cache_lifetime_status(HTTP::HeaderMap const& headers, AK::Duration freshness_lifetime, AK::Duration current_age)
{
return freshness_lifetime > current_age;
auto revalidation_status = [&]() {
// In order to revalidate a cache entry, we must have one of these headers to attach to the revalidation request.
if (headers.contains("Last-Modified"sv) || headers.contains("ETag"sv))
return CacheLifetimeStatus::MustRevalidate;
return CacheLifetimeStatus::Expired;
};
auto cache_control = headers.get("Cache-Control"sv);
// https://httpwg.org/specs/rfc9111.html#cache-response-directive.no-cache
// The no-cache response directive, in its unqualified form (without an argument), indicates that the response MUST
// NOT be used to satisfy any other request without forwarding it for validation and receiving a successful response
//
// FIXME: Handle the qualified form of the no-cache directive, which may allow us to re-use the response.
if (cache_control.has_value() && cache_control->contains("no-cache"sv, CaseSensitivity::CaseInsensitive))
return revalidation_status();
// https://httpwg.org/specs/rfc9111.html#expiration.model
if (freshness_lifetime > current_age)
return CacheLifetimeStatus::Fresh;
if (cache_control.has_value()) {
// https://httpwg.org/specs/rfc9111.html#cache-response-directive.must-revalidate
// The must-revalidate response directive indicates that once the response has become stale, a cache MUST NOT
// reuse that response to satisfy another request until it has been successfully validated by the origin
if (cache_control->contains("must-revalidate"sv, CaseSensitivity::CaseInsensitive))
return revalidation_status();
// FIXME: Implement stale-while-revalidate.
}
return CacheLifetimeStatus::Expired;
}
// https://httpwg.org/specs/rfc9111.html#validation.sent
RevalidationAttributes RevalidationAttributes::create(HTTP::HeaderMap const& headers)
{
RevalidationAttributes attributes;
attributes.etag = headers.get("ETag"sv).map([](auto const& etag) { return etag; });
attributes.last_modified = parse_http_date(headers.get("Last-Modified"sv));
return attributes;
}
// https://httpwg.org/specs/rfc9111.html#update
void update_header_fields(HTTP::HeaderMap& stored_headers, HTTP::HeaderMap const& updated_headers)
{
// Caches are required to update a stored response's header fields from another (typically newer) response in
// several situations; for example, see Sections 3.4, 4.3.4, and 4.3.5.
// When doing so, the cache MUST add each header field in the provided response to the stored response, replacing
// field values that are already present, with the following exceptions:
auto is_header_exempted_from_update = [](StringView name) {
// * Header fields excepted from storage in Section 3.1,
if (is_header_exempted_from_storage(name))
return true;
// * Header fields that the cache's stored response depends upon, as described below,
// * Header fields that are automatically processed and removed by the recipient, as described below, and
// * The Content-Length header field.
if (name.equals_ignoring_ascii_case("Content-Type"sv))
return true;
return false;
};
for (auto const& updated_header : updated_headers.headers()) {
if (!is_header_exempted_from_update(updated_header.name))
stored_headers.remove(updated_header.name);
}
for (auto const& updated_header : updated_headers.headers()) {
if (!is_header_exempted_from_update(updated_header.name))
stored_headers.set(updated_header.name, updated_header.value);
}
}
}

View file

@ -23,6 +23,21 @@ bool is_header_exempted_from_storage(StringView name);
AK::Duration calculate_freshness_lifetime(HTTP::HeaderMap const&);
AK::Duration calculate_age(HTTP::HeaderMap const&, UnixDateTime request_time, UnixDateTime response_time);
bool is_response_fresh(AK::Duration freshness_lifetime, AK::Duration current_age);
enum class CacheLifetimeStatus {
Fresh,
Expired,
MustRevalidate,
};
CacheLifetimeStatus cache_lifetime_status(HTTP::HeaderMap const&, AK::Duration freshness_lifetime, AK::Duration current_age);
struct RevalidationAttributes {
static RevalidationAttributes create(HTTP::HeaderMap const&);
Optional<ByteString> etag;
Optional<UnixDateTime> last_modified;
};
void update_header_fields(HTTP::HeaderMap&, HTTP::HeaderMap const&);
}

View file

@ -10,6 +10,7 @@
#include <LibTextCodec/Decoder.h>
#include <RequestServer/CURL.h>
#include <RequestServer/Cache/DiskCache.h>
#include <RequestServer/Cache/Utilities.h>
#include <RequestServer/ConnectionFromClient.h>
#include <RequestServer/Request.h>
#include <RequestServer/Resolver.h>
@ -129,6 +130,19 @@ void Request::notify_request_unblocked(Badge<DiskCache>)
void Request::notify_fetch_complete(Badge<ConnectionFromClient>, int result_code)
{
if (m_cache_entry_reader.has_value() && m_cache_entry_reader->must_revalidate()) {
if (acquire_status_code() == 304) {
m_cache_entry_reader->revalidation_succeeded(m_response_headers);
transition_to_state(State::ReadCache);
return;
}
if (revalidation_failed().is_error())
return;
transfer_headers_to_client_if_needed();
}
m_curl_result_code = result_code;
if (m_response_buffer.is_eof())
@ -177,8 +191,13 @@ void Request::handle_initial_state()
m_disk_cache->open_entry(*this).visit(
[&](Optional<CacheEntryReader&> cache_entry_reader) {
m_cache_entry_reader = cache_entry_reader;
if (m_cache_entry_reader.has_value())
transition_to_state(State::ReadCache);
if (m_cache_entry_reader.has_value()) {
if (m_cache_entry_reader->must_revalidate())
transition_to_state(State::DNSLookup);
else
transition_to_state(State::ReadCache);
}
},
[&](DiskCache::CacheHasOpenEntry) {
// If an existing entry is open for writing, we must wait for it to complete.
@ -208,31 +227,18 @@ void Request::handle_initial_state()
void Request::handle_read_cache_state()
{
#if defined(AK_OS_WINDOWS)
dbgln("FIXME: Request::handle_read_from_cache_state: Not implemented on Windows");
transition_to_state(State::Error);
#else
m_status_code = m_cache_entry_reader->status_code();
m_reason_phrase = m_cache_entry_reader->reason_phrase();
m_response_headers = m_cache_entry_reader->response_headers();
auto pipe_or_error = RequestPipe::create();
if (pipe_or_error.is_error()) {
dbgln("Request::handle_read_from_cache_state: Failed to create pipe: {}", pipe_or_error.error());
transition_to_state(State::Error);
if (inform_client_request_started().is_error())
return;
}
auto pipe = pipe_or_error.release_value();
m_client.async_request_started(m_request_id, IPC::File::adopt_fd(pipe.reader_fd()));
m_client_request_pipe = move(pipe);
m_client.async_headers_became_available(m_request_id, m_response_headers, m_status_code, m_reason_phrase);
m_sent_response_headers_to_client = true;
m_cache_entry_reader->pipe_to(
m_client_request_pipe.value().writer_fd(),
m_client_request_pipe->writer_fd(),
[this](auto bytes_sent) {
m_bytes_transferred_to_client = bytes_sent;
m_curl_result_code = CURLE_OK;
@ -246,7 +252,6 @@ void Request::handle_read_cache_state()
transition_to_state(State::DNSLookup);
});
#endif
}
void Request::handle_dns_lookup_state()
@ -308,28 +313,13 @@ void Request::handle_fetch_state()
return;
}
if (!m_start_offset_of_response_resumed_from_cache.has_value()) {
auto pipe_or_error = RequestPipe::create();
if (pipe_or_error.is_error()) {
dbgln("Request::handle_start_fetch_state: Failed to create pipe: {}", pipe_or_error.error());
transition_to_state(State::Error);
auto is_revalidation_request = m_cache_entry_reader.has_value() && m_cache_entry_reader->must_revalidate();
if (!m_start_offset_of_response_resumed_from_cache.has_value() && !is_revalidation_request) {
if (inform_client_request_started().is_error())
return;
}
auto pipe = pipe_or_error.release_value();
m_client.async_request_started(m_request_id, IPC::File::adopt_fd(pipe.reader_fd()));
m_client_request_pipe = move(pipe);
}
m_client_writer_notifier = Core::Notifier::construct(m_client_request_pipe.value().writer_fd(), Core::NotificationType::Write);
m_client_writer_notifier->set_enabled(false);
m_client_writer_notifier->on_activation = [this] {
if (auto result = write_queued_bytes_without_blocking(); result.is_error())
dbgln("Warning: Failed to write buffered request data (it's likely the client disappeared): {}", result.error());
};
auto set_option = [&](auto option, auto value) {
if (auto result = curl_easy_setopt(m_curl_easy_handle, option, value); result != CURLE_OK)
dbgln("Request::handle_start_fetch_state: Failed to set curl option: {}", curl_easy_strerror(result));
@ -383,16 +373,30 @@ void Request::handle_fetch_state()
}
}
if (is_revalidation_request) {
auto revalidation_attributes = RevalidationAttributes::create(m_cache_entry_reader->response_headers());
VERIFY(revalidation_attributes.etag.has_value() || revalidation_attributes.last_modified.has_value());
if (revalidation_attributes.etag.has_value()) {
// There is no CURLOPT for If-None-Match, so we must set the header value directly.
auto header_string = ByteString::formatted("If-None-Match: {}", *revalidation_attributes.etag);
curl_headers = curl_slist_append(curl_headers, header_string.characters());
}
if (revalidation_attributes.last_modified.has_value()) {
set_option(CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
set_option(CURLOPT_TIMEVALUE, revalidation_attributes.last_modified->seconds_since_epoch());
}
} else if (m_start_offset_of_response_resumed_from_cache.has_value()) {
auto range = ByteString::formatted("{}-", *m_start_offset_of_response_resumed_from_cache);
set_option(CURLOPT_RANGE, range.characters());
}
if (curl_headers) {
set_option(CURLOPT_HTTPHEADER, curl_headers);
m_curl_string_lists.append(curl_headers);
}
if (m_start_offset_of_response_resumed_from_cache.has_value()) {
auto range = ByteString::formatted("{}-", *m_start_offset_of_response_resumed_from_cache);
set_option(CURLOPT_RANGE, range.characters());
}
// FIXME: Set up proxy if applicable
(void)m_proxy_data;
@ -493,6 +497,23 @@ size_t Request::on_header_received(void* buffer, size_t size, size_t nmemb, void
size_t Request::on_data_received(void* buffer, size_t size, size_t nmemb, void* user_data)
{
auto& request = *static_cast<Request*>(user_data);
if (request.m_cache_entry_reader.has_value() && request.m_cache_entry_reader->must_revalidate()) {
// If we arrive here, we did not receive an HTTP 304 response code. We must remove the cache entry and inform
// the client of the new response headers and data.
if (request.revalidation_failed().is_error())
return CURL_WRITEFUNC_ERROR;
request.m_disk_cache->create_entry(request).visit(
[&](Optional<CacheEntryWriter&> cache_entry_writer) {
request.m_cache_entry_writer = cache_entry_writer;
},
[&](DiskCache::CacheHasOpenEntry) {
// This should not be reachable, as cache revalidation holds an exclusive lock on the cache entry.
VERIFY_NOT_REACHED();
});
}
request.transfer_headers_to_client_if_needed();
auto total_size = size * nmemb;
@ -511,6 +532,21 @@ size_t Request::on_data_received(void* buffer, size_t size, size_t nmemb, void*
return total_size;
}
ErrorOr<void> Request::inform_client_request_started()
{
auto request_pipe = RequestPipe::create();
if (request_pipe.is_error()) {
dbgln("Request::handle_read_from_cache_state: Failed to create pipe: {}", request_pipe.error());
transition_to_state(State::Error);
return request_pipe.release_error();
}
m_client_request_pipe = request_pipe.release_value();
m_client.async_request_started(m_request_id, IPC::File::adopt_fd(m_client_request_pipe->reader_fd()));
return {};
}
void Request::transfer_headers_to_client_if_needed()
{
if (exchange(m_sent_response_headers_to_client, true))
@ -527,6 +563,16 @@ void Request::transfer_headers_to_client_if_needed()
ErrorOr<void> Request::write_queued_bytes_without_blocking()
{
if (!m_client_writer_notifier) {
m_client_writer_notifier = Core::Notifier::construct(m_client_request_pipe->writer_fd(), Core::NotificationType::Write);
m_client_writer_notifier->set_enabled(false);
m_client_writer_notifier->on_activation = [this] {
if (auto result = write_queued_bytes_without_blocking(); result.is_error())
dbgln("Warning: Failed to write buffered request data (it's likely the client disappeared): {}", result.error());
};
}
auto available_bytes = m_response_buffer.used_buffer_size();
// If we've received a response to a range request that is not the partial content (206) we requested, we must
@ -563,7 +609,7 @@ ErrorOr<void> Request::write_queued_bytes_without_blocking()
bytes_to_send.resize(available_bytes);
m_response_buffer.peek_some(bytes_to_send);
auto result = m_client_request_pipe.value().write(bytes_to_send);
auto result = m_client_request_pipe->write(bytes_to_send);
if (result.is_error()) {
if (result.error().code() != EAGAIN)
return result.release_error();
@ -589,6 +635,15 @@ ErrorOr<void> Request::write_queued_bytes_without_blocking()
return {};
}
ErrorOr<void> Request::revalidation_failed()
{
m_cache_entry_reader->revalidation_failed();
m_cache_entry_reader.clear();
TRY(inform_client_request_started());
return {};
}
u32 Request::acquire_status_code() const
{
long http_status_code = 0;

View file

@ -109,8 +109,10 @@ private:
static size_t on_header_received(void* buffer, size_t size, size_t nmemb, void* user_data);
static size_t on_data_received(void* buffer, size_t size, size_t nmemb, void* user_data);
ErrorOr<void> inform_client_request_started();
void transfer_headers_to_client_if_needed();
ErrorOr<void> write_queued_bytes_without_blocking();
ErrorOr<void> revalidation_failed();
u32 acquire_status_code() const;
Requests::RequestTimingInfo acquire_timing_info() const;