diff --git a/Services/RequestServer/Cache/CacheEntry.cpp b/Services/RequestServer/Cache/CacheEntry.cpp index d857aef982f..317c6ce7b9c 100644 --- a/Services/RequestServer/Cache/CacheEntry.cpp +++ b/Services/RequestServer/Cache/CacheEntry.cpp @@ -121,7 +121,12 @@ ErrorOr CacheEntryWriter::write_status_and_reason(u32 status_code, Optiona if (!is_cacheable(status_code, response_headers)) return Error::from_string_literal("Response is not cacheable"); - if (auto freshness = calculate_freshness_lifetime(response_headers); freshness.is_negative() || freshness.is_zero()) + auto freshness_lifetime = calculate_freshness_lifetime(response_headers); + auto current_age = calculate_age(response_headers, m_request_time, m_response_time); + + // We can cache already-expired responses if there are other cache directives that allow us to revalidate the + // response on subsequent requests. For example, `Cache-Control: max-age=0, must-revalidate`. + if (cache_lifetime_status(response_headers, freshness_lifetime, current_age) == CacheLifetimeStatus::Expired) return Error::from_string_literal("Response has already expired"); TRY(m_file->write_value(m_cache_header)); @@ -240,6 +245,22 @@ CacheEntryReader::CacheEntryReader(DiskCache& disk_cache, CacheIndex& index, u64 { } +void CacheEntryReader::revalidation_succeeded(HTTP::HeaderMap const& response_headers) +{ + dbgln("\033[34;1mCache revalidation succeeded for\033[0m {}", m_url); + + update_header_fields(m_response_headers, response_headers); + m_index.update_response_headers(m_cache_key, m_response_headers); +} + +void CacheEntryReader::revalidation_failed() +{ + dbgln("\033[33;1mCache revalidation failed for\033[0m {}", m_url); + + remove(); + close_and_destroy_cache_entry(); +} + void CacheEntryReader::pipe_to(int pipe_fd, Function on_complete, Function on_error) { VERIFY(m_pipe_fd == -1); diff --git a/Services/RequestServer/Cache/CacheEntry.h b/Services/RequestServer/Cache/CacheEntry.h index 6f403762a2a..7d52d9eb8f6 100644 --- a/Services/RequestServer/Cache/CacheEntry.h +++ b/Services/RequestServer/Cache/CacheEntry.h @@ -100,6 +100,12 @@ public: static ErrorOr> create(DiskCache&, CacheIndex&, u64 cache_key, HTTP::HeaderMap, u64 data_size); virtual ~CacheEntryReader() override = default; + bool must_revalidate() const { return m_must_revalidate; } + void set_must_revalidate() { m_must_revalidate = true; } + + void revalidation_succeeded(HTTP::HeaderMap const&); + void revalidation_failed(); + void pipe_to(int pipe_fd, Function on_complete, Function on_error); u32 status_code() const { return m_cache_header.status_code; } @@ -128,6 +134,8 @@ private: Optional m_reason_phrase; HTTP::HeaderMap m_response_headers; + bool m_must_revalidate { false }; + u64 const m_data_offset { 0 }; u64 const m_data_size { 0 }; }; diff --git a/Services/RequestServer/Cache/CacheIndex.cpp b/Services/RequestServer/Cache/CacheIndex.cpp index f57ae857fce..bd1639d015d 100644 --- a/Services/RequestServer/Cache/CacheIndex.cpp +++ b/Services/RequestServer/Cache/CacheIndex.cpp @@ -99,6 +99,7 @@ ErrorOr CacheIndex::create(Database::Database& database) statements.remove_entry = TRY(database.prepare_statement("DELETE FROM CacheIndex WHERE cache_key = ?;"sv)); statements.remove_all_entries = TRY(database.prepare_statement("DELETE FROM CacheIndex;"sv)); statements.select_entry = TRY(database.prepare_statement("SELECT * FROM CacheIndex WHERE cache_key = ?;"sv)); + statements.update_response_headers = TRY(database.prepare_statement("UPDATE CacheIndex SET response_headers = ? WHERE cache_key = ?;"sv)); statements.update_last_access_time = TRY(database.prepare_statement("UPDATE CacheIndex SET last_access_time = ? WHERE cache_key = ?;"sv)); return CacheIndex { database, statements }; @@ -140,6 +141,16 @@ void CacheIndex::remove_all_entries() m_entries.clear(); } +void CacheIndex::update_response_headers(u64 cache_key, HTTP::HeaderMap response_headers) +{ + auto entry = m_entries.get(cache_key); + if (!entry.has_value()) + return; + + m_database.execute_statement(m_statements.update_response_headers, {}, serialize_headers(response_headers), cache_key); + entry->response_headers = move(response_headers); +} + void CacheIndex::update_last_access_time(u64 cache_key) { auto entry = m_entries.get(cache_key); diff --git a/Services/RequestServer/Cache/CacheIndex.h b/Services/RequestServer/Cache/CacheIndex.h index d627db4d62a..58fcd20bbd2 100644 --- a/Services/RequestServer/Cache/CacheIndex.h +++ b/Services/RequestServer/Cache/CacheIndex.h @@ -39,6 +39,7 @@ public: Optional find_entry(u64 cache_key); + void update_response_headers(u64 cache_key, HTTP::HeaderMap); void update_last_access_time(u64 cache_key); private: @@ -47,6 +48,7 @@ private: Database::StatementID remove_entry { 0 }; Database::StatementID remove_all_entries { 0 }; Database::StatementID select_entry { 0 }; + Database::StatementID update_response_headers { 0 }; Database::StatementID update_last_access_time { 0 }; }; diff --git a/Services/RequestServer/Cache/DiskCache.cpp b/Services/RequestServer/Cache/DiskCache.cpp index f5704a2afbc..dae16d55def 100644 --- a/Services/RequestServer/Cache/DiskCache.cpp +++ b/Services/RequestServer/Cache/DiskCache.cpp @@ -83,17 +83,30 @@ Variant, DiskCache::CacheHasOpenEntry> DiskCache::op return Optional {}; } - auto freshness_lifetime = calculate_freshness_lifetime(cache_entry.value()->response_headers()); - auto current_age = calculate_age(cache_entry.value()->response_headers(), index_entry->request_time, index_entry->response_time); + auto const& response_headers = cache_entry.value()->response_headers(); + auto freshness_lifetime = calculate_freshness_lifetime(response_headers); + auto current_age = calculate_age(response_headers, index_entry->request_time, index_entry->response_time); - if (!is_response_fresh(freshness_lifetime, current_age)) { + switch (cache_lifetime_status(response_headers, freshness_lifetime, current_age)) { + case CacheLifetimeStatus::Fresh: + dbgln("\033[32;1mOpened disk cache entry for\033[0m {} (lifetime={}s age={}s) ({} bytes)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds(), index_entry->data_size); + break; + + case CacheLifetimeStatus::Expired: dbgln("\033[33;1mCache entry expired for\033[0m {} (lifetime={}s age={}s)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds()); cache_entry.value()->remove(); return Optional {}; - } - dbgln("\033[32;1mOpened disk cache entry for\033[0m {} (lifetime={}s age={}s) ({} bytes)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds(), index_entry->data_size); + case CacheLifetimeStatus::MustRevalidate: + // We will hold an exclusive lock on the cache entry for revalidation requests. + if (check_if_cache_has_open_entry(request, cache_key, CheckReaderEntries::Yes)) + return Optional {}; + + dbgln("\033[36;1mMust revalidate disk cache entry for\033[0m {} (lifetime={}s age={}s)", request.url(), freshness_lifetime.to_seconds(), current_age.to_seconds()); + cache_entry.value()->set_must_revalidate(); + break; + } auto* cache_entry_pointer = cache_entry.value().ptr(); m_open_cache_entries.ensure(cache_key).append(cache_entry.release_value()); @@ -113,14 +126,17 @@ bool DiskCache::check_if_cache_has_open_entry(Request& request, u64 cache_key, C m_requests_waiting_completion.ensure(cache_key).append(request); return true; } + + // We allow concurrent readers unless another reader is open for revalidation. That reader will issue the network + // request, which may then result in the cache entry being updated or deleted. + if (check_reader_entries == CheckReaderEntries::Yes || as(*open_entry).must_revalidate()) { + dbgln("\033[36;1mDeferring disk cache entry for\033[0m {} (waiting for existing reader)", request.url()); + m_requests_waiting_completion.ensure(cache_key).append(request); + return true; + } } - if (check_reader_entries == CheckReaderEntries::No) - return false; - - dbgln("\033[36;1mDeferring disk cache entry for\033[0m {} (waiting for existing reader)", request.url()); - m_requests_waiting_completion.ensure(cache_key).append(request); - return true; + return false; } void DiskCache::clear_cache() diff --git a/Services/RequestServer/Cache/Utilities.cpp b/Services/RequestServer/Cache/Utilities.cpp index 42862bbe13f..93dd2c875d8 100644 --- a/Services/RequestServer/Cache/Utilities.cpp +++ b/Services/RequestServer/Cache/Utilities.cpp @@ -114,12 +114,6 @@ bool is_cacheable(u32 status_code, HTTP::HeaderMap const& headers) // - a cache extension that allows it to be cached (see Section 5.2.3); or // - a status code that is defined as heuristically cacheable (see Section 4.2.2). - // FIXME: Implement cache revalidation. - if (cache_control->contains("no-cache"sv, CaseSensitivity::CaseInsensitive)) - return false; - if (cache_control->contains("revalidate"sv, CaseSensitivity::CaseInsensitive)) - return false; - return true; } @@ -216,10 +210,84 @@ AK::Duration calculate_age(HTTP::HeaderMap const& headers, UnixDateTime request_ return AK::Duration::from_seconds(current_age); } -// https://httpwg.org/specs/rfc9111.html#expiration.model -bool is_response_fresh(AK::Duration freshness_lifetime, AK::Duration current_age) +CacheLifetimeStatus cache_lifetime_status(HTTP::HeaderMap const& headers, AK::Duration freshness_lifetime, AK::Duration current_age) { - return freshness_lifetime > current_age; + auto revalidation_status = [&]() { + // In order to revalidate a cache entry, we must have one of these headers to attach to the revalidation request. + if (headers.contains("Last-Modified"sv) || headers.contains("ETag"sv)) + return CacheLifetimeStatus::MustRevalidate; + return CacheLifetimeStatus::Expired; + }; + + auto cache_control = headers.get("Cache-Control"sv); + + // https://httpwg.org/specs/rfc9111.html#cache-response-directive.no-cache + // The no-cache response directive, in its unqualified form (without an argument), indicates that the response MUST + // NOT be used to satisfy any other request without forwarding it for validation and receiving a successful response + // + // FIXME: Handle the qualified form of the no-cache directive, which may allow us to re-use the response. + if (cache_control.has_value() && cache_control->contains("no-cache"sv, CaseSensitivity::CaseInsensitive)) + return revalidation_status(); + + // https://httpwg.org/specs/rfc9111.html#expiration.model + if (freshness_lifetime > current_age) + return CacheLifetimeStatus::Fresh; + + if (cache_control.has_value()) { + // https://httpwg.org/specs/rfc9111.html#cache-response-directive.must-revalidate + // The must-revalidate response directive indicates that once the response has become stale, a cache MUST NOT + // reuse that response to satisfy another request until it has been successfully validated by the origin + if (cache_control->contains("must-revalidate"sv, CaseSensitivity::CaseInsensitive)) + return revalidation_status(); + + // FIXME: Implement stale-while-revalidate. + } + + return CacheLifetimeStatus::Expired; +} + +// https://httpwg.org/specs/rfc9111.html#validation.sent +RevalidationAttributes RevalidationAttributes::create(HTTP::HeaderMap const& headers) +{ + RevalidationAttributes attributes; + attributes.etag = headers.get("ETag"sv).map([](auto const& etag) { return etag; }); + attributes.last_modified = parse_http_date(headers.get("Last-Modified"sv)); + + return attributes; +} + +// https://httpwg.org/specs/rfc9111.html#update +void update_header_fields(HTTP::HeaderMap& stored_headers, HTTP::HeaderMap const& updated_headers) +{ + // Caches are required to update a stored response's header fields from another (typically newer) response in + // several situations; for example, see Sections 3.4, 4.3.4, and 4.3.5. + + // When doing so, the cache MUST add each header field in the provided response to the stored response, replacing + // field values that are already present, with the following exceptions: + auto is_header_exempted_from_update = [](StringView name) { + // * Header fields excepted from storage in Section 3.1, + if (is_header_exempted_from_storage(name)) + return true; + + // * Header fields that the cache's stored response depends upon, as described below, + // * Header fields that are automatically processed and removed by the recipient, as described below, and + + // * The Content-Length header field. + if (name.equals_ignoring_ascii_case("Content-Type"sv)) + return true; + + return false; + }; + + for (auto const& updated_header : updated_headers.headers()) { + if (!is_header_exempted_from_update(updated_header.name)) + stored_headers.remove(updated_header.name); + } + + for (auto const& updated_header : updated_headers.headers()) { + if (!is_header_exempted_from_update(updated_header.name)) + stored_headers.set(updated_header.name, updated_header.value); + } } } diff --git a/Services/RequestServer/Cache/Utilities.h b/Services/RequestServer/Cache/Utilities.h index 4e8e8738b4f..87376245510 100644 --- a/Services/RequestServer/Cache/Utilities.h +++ b/Services/RequestServer/Cache/Utilities.h @@ -23,6 +23,21 @@ bool is_header_exempted_from_storage(StringView name); AK::Duration calculate_freshness_lifetime(HTTP::HeaderMap const&); AK::Duration calculate_age(HTTP::HeaderMap const&, UnixDateTime request_time, UnixDateTime response_time); -bool is_response_fresh(AK::Duration freshness_lifetime, AK::Duration current_age); + +enum class CacheLifetimeStatus { + Fresh, + Expired, + MustRevalidate, +}; +CacheLifetimeStatus cache_lifetime_status(HTTP::HeaderMap const&, AK::Duration freshness_lifetime, AK::Duration current_age); + +struct RevalidationAttributes { + static RevalidationAttributes create(HTTP::HeaderMap const&); + + Optional etag; + Optional last_modified; +}; + +void update_header_fields(HTTP::HeaderMap&, HTTP::HeaderMap const&); } diff --git a/Services/RequestServer/Request.cpp b/Services/RequestServer/Request.cpp index 961f2dc133b..65e2baab3f3 100644 --- a/Services/RequestServer/Request.cpp +++ b/Services/RequestServer/Request.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,19 @@ void Request::notify_request_unblocked(Badge) void Request::notify_fetch_complete(Badge, int result_code) { + if (m_cache_entry_reader.has_value() && m_cache_entry_reader->must_revalidate()) { + if (acquire_status_code() == 304) { + m_cache_entry_reader->revalidation_succeeded(m_response_headers); + transition_to_state(State::ReadCache); + return; + } + + if (revalidation_failed().is_error()) + return; + + transfer_headers_to_client_if_needed(); + } + m_curl_result_code = result_code; if (m_response_buffer.is_eof()) @@ -177,8 +191,13 @@ void Request::handle_initial_state() m_disk_cache->open_entry(*this).visit( [&](Optional cache_entry_reader) { m_cache_entry_reader = cache_entry_reader; - if (m_cache_entry_reader.has_value()) - transition_to_state(State::ReadCache); + + if (m_cache_entry_reader.has_value()) { + if (m_cache_entry_reader->must_revalidate()) + transition_to_state(State::DNSLookup); + else + transition_to_state(State::ReadCache); + } }, [&](DiskCache::CacheHasOpenEntry) { // If an existing entry is open for writing, we must wait for it to complete. @@ -208,31 +227,18 @@ void Request::handle_initial_state() void Request::handle_read_cache_state() { -#if defined(AK_OS_WINDOWS) - dbgln("FIXME: Request::handle_read_from_cache_state: Not implemented on Windows"); - transition_to_state(State::Error); -#else m_status_code = m_cache_entry_reader->status_code(); m_reason_phrase = m_cache_entry_reader->reason_phrase(); m_response_headers = m_cache_entry_reader->response_headers(); - auto pipe_or_error = RequestPipe::create(); - if (pipe_or_error.is_error()) { - dbgln("Request::handle_read_from_cache_state: Failed to create pipe: {}", pipe_or_error.error()); - transition_to_state(State::Error); + if (inform_client_request_started().is_error()) return; - } - - auto pipe = pipe_or_error.release_value(); - - m_client.async_request_started(m_request_id, IPC::File::adopt_fd(pipe.reader_fd())); - m_client_request_pipe = move(pipe); m_client.async_headers_became_available(m_request_id, m_response_headers, m_status_code, m_reason_phrase); m_sent_response_headers_to_client = true; m_cache_entry_reader->pipe_to( - m_client_request_pipe.value().writer_fd(), + m_client_request_pipe->writer_fd(), [this](auto bytes_sent) { m_bytes_transferred_to_client = bytes_sent; m_curl_result_code = CURLE_OK; @@ -246,7 +252,6 @@ void Request::handle_read_cache_state() transition_to_state(State::DNSLookup); }); -#endif } void Request::handle_dns_lookup_state() @@ -308,28 +313,13 @@ void Request::handle_fetch_state() return; } - if (!m_start_offset_of_response_resumed_from_cache.has_value()) { - auto pipe_or_error = RequestPipe::create(); - if (pipe_or_error.is_error()) { - dbgln("Request::handle_start_fetch_state: Failed to create pipe: {}", pipe_or_error.error()); - transition_to_state(State::Error); + auto is_revalidation_request = m_cache_entry_reader.has_value() && m_cache_entry_reader->must_revalidate(); + + if (!m_start_offset_of_response_resumed_from_cache.has_value() && !is_revalidation_request) { + if (inform_client_request_started().is_error()) return; - } - - auto pipe = pipe_or_error.release_value(); - - m_client.async_request_started(m_request_id, IPC::File::adopt_fd(pipe.reader_fd())); - m_client_request_pipe = move(pipe); } - m_client_writer_notifier = Core::Notifier::construct(m_client_request_pipe.value().writer_fd(), Core::NotificationType::Write); - m_client_writer_notifier->set_enabled(false); - - m_client_writer_notifier->on_activation = [this] { - if (auto result = write_queued_bytes_without_blocking(); result.is_error()) - dbgln("Warning: Failed to write buffered request data (it's likely the client disappeared): {}", result.error()); - }; - auto set_option = [&](auto option, auto value) { if (auto result = curl_easy_setopt(m_curl_easy_handle, option, value); result != CURLE_OK) dbgln("Request::handle_start_fetch_state: Failed to set curl option: {}", curl_easy_strerror(result)); @@ -383,16 +373,30 @@ void Request::handle_fetch_state() } } + if (is_revalidation_request) { + auto revalidation_attributes = RevalidationAttributes::create(m_cache_entry_reader->response_headers()); + VERIFY(revalidation_attributes.etag.has_value() || revalidation_attributes.last_modified.has_value()); + + if (revalidation_attributes.etag.has_value()) { + // There is no CURLOPT for If-None-Match, so we must set the header value directly. + auto header_string = ByteString::formatted("If-None-Match: {}", *revalidation_attributes.etag); + curl_headers = curl_slist_append(curl_headers, header_string.characters()); + } + + if (revalidation_attributes.last_modified.has_value()) { + set_option(CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE); + set_option(CURLOPT_TIMEVALUE, revalidation_attributes.last_modified->seconds_since_epoch()); + } + } else if (m_start_offset_of_response_resumed_from_cache.has_value()) { + auto range = ByteString::formatted("{}-", *m_start_offset_of_response_resumed_from_cache); + set_option(CURLOPT_RANGE, range.characters()); + } + if (curl_headers) { set_option(CURLOPT_HTTPHEADER, curl_headers); m_curl_string_lists.append(curl_headers); } - if (m_start_offset_of_response_resumed_from_cache.has_value()) { - auto range = ByteString::formatted("{}-", *m_start_offset_of_response_resumed_from_cache); - set_option(CURLOPT_RANGE, range.characters()); - } - // FIXME: Set up proxy if applicable (void)m_proxy_data; @@ -493,6 +497,23 @@ size_t Request::on_header_received(void* buffer, size_t size, size_t nmemb, void size_t Request::on_data_received(void* buffer, size_t size, size_t nmemb, void* user_data) { auto& request = *static_cast(user_data); + + if (request.m_cache_entry_reader.has_value() && request.m_cache_entry_reader->must_revalidate()) { + // If we arrive here, we did not receive an HTTP 304 response code. We must remove the cache entry and inform + // the client of the new response headers and data. + if (request.revalidation_failed().is_error()) + return CURL_WRITEFUNC_ERROR; + + request.m_disk_cache->create_entry(request).visit( + [&](Optional cache_entry_writer) { + request.m_cache_entry_writer = cache_entry_writer; + }, + [&](DiskCache::CacheHasOpenEntry) { + // This should not be reachable, as cache revalidation holds an exclusive lock on the cache entry. + VERIFY_NOT_REACHED(); + }); + } + request.transfer_headers_to_client_if_needed(); auto total_size = size * nmemb; @@ -511,6 +532,21 @@ size_t Request::on_data_received(void* buffer, size_t size, size_t nmemb, void* return total_size; } +ErrorOr Request::inform_client_request_started() +{ + auto request_pipe = RequestPipe::create(); + if (request_pipe.is_error()) { + dbgln("Request::handle_read_from_cache_state: Failed to create pipe: {}", request_pipe.error()); + transition_to_state(State::Error); + return request_pipe.release_error(); + } + + m_client_request_pipe = request_pipe.release_value(); + m_client.async_request_started(m_request_id, IPC::File::adopt_fd(m_client_request_pipe->reader_fd())); + + return {}; +} + void Request::transfer_headers_to_client_if_needed() { if (exchange(m_sent_response_headers_to_client, true)) @@ -527,6 +563,16 @@ void Request::transfer_headers_to_client_if_needed() ErrorOr Request::write_queued_bytes_without_blocking() { + if (!m_client_writer_notifier) { + m_client_writer_notifier = Core::Notifier::construct(m_client_request_pipe->writer_fd(), Core::NotificationType::Write); + m_client_writer_notifier->set_enabled(false); + + m_client_writer_notifier->on_activation = [this] { + if (auto result = write_queued_bytes_without_blocking(); result.is_error()) + dbgln("Warning: Failed to write buffered request data (it's likely the client disappeared): {}", result.error()); + }; + } + auto available_bytes = m_response_buffer.used_buffer_size(); // If we've received a response to a range request that is not the partial content (206) we requested, we must @@ -563,7 +609,7 @@ ErrorOr Request::write_queued_bytes_without_blocking() bytes_to_send.resize(available_bytes); m_response_buffer.peek_some(bytes_to_send); - auto result = m_client_request_pipe.value().write(bytes_to_send); + auto result = m_client_request_pipe->write(bytes_to_send); if (result.is_error()) { if (result.error().code() != EAGAIN) return result.release_error(); @@ -589,6 +635,15 @@ ErrorOr Request::write_queued_bytes_without_blocking() return {}; } +ErrorOr Request::revalidation_failed() +{ + m_cache_entry_reader->revalidation_failed(); + m_cache_entry_reader.clear(); + + TRY(inform_client_request_started()); + return {}; +} + u32 Request::acquire_status_code() const { long http_status_code = 0; diff --git a/Services/RequestServer/Request.h b/Services/RequestServer/Request.h index ba1db47438c..7e178c33a60 100644 --- a/Services/RequestServer/Request.h +++ b/Services/RequestServer/Request.h @@ -109,8 +109,10 @@ private: static size_t on_header_received(void* buffer, size_t size, size_t nmemb, void* user_data); static size_t on_data_received(void* buffer, size_t size, size_t nmemb, void* user_data); + ErrorOr inform_client_request_started(); void transfer_headers_to_client_if_needed(); ErrorOr write_queued_bytes_without_blocking(); + ErrorOr revalidation_failed(); u32 acquire_status_code() const; Requests::RequestTimingInfo acquire_timing_info() const;