mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-19 07:33:20 +00:00

This adds a disk cache for HTTP responses received from the network. For now, we take a rather conservative approach to caching. We don't cache a response until we're 100% sure it is cacheable (there are heuristics we can implement in the future based on the absence of specific headers). The cache is broken into 2 categories of files: 1. An index file. This is a SQL database containing metadata about each cache entry (URL, timestamps, etc.). 2. Cache files. Each cached response is in its own file. The file is an amalgamation of all info needed to reconstruct an HTTP response. This includes the status code, headers, body, etc. A cache entry is created once we receive the headers for a response. The index, however, is not updated at this point. We stream the body into the cache entry as it is received. Once we've successfully cached the entire body, we create an index entry in the database. If any of these steps failed along the way, the cache entry is removed and the index is left untouched. Subsequent requests are checked for cache hits from the index. If a hit is found, we read just enough of the cache entry to inform WebContent of the status code and headers. The body of the response is piped to WC via syscalls, such that the transfer happens entirely in the kernel; no need to allocate the memory for the body in userspace (WC still allocates a buffer to hold the data, of course). If an error occurs while piping the body, we currently error out the request. There is a FIXME to switch to a network request. Cache hits are also validated for freshness before they are used. If a response has expired, we remove it and its index entry, and proceed with a network request.
130 lines
3.7 KiB
C++
130 lines
3.7 KiB
C++
/*
|
|
* Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/Error.h>
|
|
#include <AK/LexicalPath.h>
|
|
#include <AK/Optional.h>
|
|
#include <AK/String.h>
|
|
#include <AK/Types.h>
|
|
#include <LibCore/File.h>
|
|
#include <LibHTTP/HeaderMap.h>
|
|
#include <RequestServer/Forward.h>
|
|
|
|
namespace RequestServer {
|
|
|
|
struct [[gnu::packed]] CacheHeader {
|
|
static ErrorOr<CacheHeader> read_from_stream(Stream&);
|
|
ErrorOr<void> write_to_stream(Stream&) const;
|
|
|
|
static constexpr auto CACHE_MAGIC = 0xcafef00du;
|
|
static constexpr auto CACHE_VERSION = 1;
|
|
|
|
u32 magic { CACHE_MAGIC };
|
|
u32 version { CACHE_VERSION };
|
|
|
|
u32 url_size { 0 };
|
|
u32 url_hash { 0 };
|
|
|
|
u32 status_code { 0 };
|
|
u32 reason_phrase_size { 0 };
|
|
u32 reason_phrase_hash { 0 };
|
|
|
|
u32 headers_size { 0 };
|
|
u32 headers_hash { 0 };
|
|
};
|
|
|
|
struct [[gnu::packed]] CacheFooter {
|
|
static ErrorOr<CacheFooter> read_from_stream(Stream&);
|
|
ErrorOr<void> write_to_stream(Stream&) const;
|
|
|
|
u64 data_size { 0 };
|
|
u32 crc32 { 0 };
|
|
};
|
|
|
|
// A cache entry is an amalgamation of all information needed to reconstruct HTTP responses. It is created once we have
|
|
// received the response headers for a request. The body is streamed into the entry as it is received. The cache format
|
|
// on disk is:
|
|
//
|
|
// [CacheHeader][URL][ReasonPhrase][HttpHeaders][Data][CacheFooter]
|
|
class CacheEntry {
|
|
public:
|
|
virtual ~CacheEntry() = default;
|
|
|
|
void remove();
|
|
|
|
protected:
|
|
CacheEntry(DiskCache&, CacheIndex&, u64 cache_key, String url, LexicalPath, CacheHeader);
|
|
|
|
void close_and_destory_cache_entry();
|
|
|
|
DiskCache& m_disk_cache;
|
|
CacheIndex& m_index;
|
|
|
|
u64 m_cache_key { 0 };
|
|
|
|
String m_url;
|
|
LexicalPath m_path;
|
|
|
|
CacheHeader m_cache_header;
|
|
CacheFooter m_cache_footer;
|
|
};
|
|
|
|
class CacheEntryWriter : public CacheEntry {
|
|
public:
|
|
static ErrorOr<NonnullOwnPtr<CacheEntryWriter>> create(DiskCache&, CacheIndex&, u64 cache_key, String url, u32 status_code, Optional<String> reason_phrase, HTTP::HeaderMap const&, UnixDateTime request_time);
|
|
virtual ~CacheEntryWriter() override = default;
|
|
|
|
ErrorOr<void> write_data(ReadonlyBytes);
|
|
ErrorOr<void> flush();
|
|
|
|
private:
|
|
CacheEntryWriter(DiskCache&, CacheIndex&, u64 cache_key, String url, LexicalPath, NonnullOwnPtr<Core::OutputBufferedFile>, CacheHeader, UnixDateTime request_time);
|
|
|
|
NonnullOwnPtr<Core::OutputBufferedFile> m_file;
|
|
|
|
UnixDateTime m_request_time;
|
|
UnixDateTime m_response_time;
|
|
};
|
|
|
|
class CacheEntryReader : public CacheEntry {
|
|
public:
|
|
static ErrorOr<NonnullOwnPtr<CacheEntryReader>> create(DiskCache&, CacheIndex&, u64 cache_key, u64 data_size);
|
|
virtual ~CacheEntryReader() override = default;
|
|
|
|
void pipe_to(int pipe_fd, Function<void(u64 bytes_piped)> on_complete, Function<void(u64 bytes_piped)> on_error);
|
|
|
|
u32 status_code() const { return m_cache_header.status_code; }
|
|
Optional<String> const& reason_phrase() const { return m_reason_phrase; }
|
|
HTTP::HeaderMap const& headers() const { return m_headers; }
|
|
|
|
private:
|
|
CacheEntryReader(DiskCache&, CacheIndex&, u64 cache_key, String url, LexicalPath, NonnullOwnPtr<Core::File>, int fd, CacheHeader, Optional<String> reason_phrase, HTTP::HeaderMap, u64 data_offset, u64 data_size);
|
|
|
|
void pipe_without_blocking();
|
|
void pipe_complete();
|
|
|
|
ErrorOr<void> read_and_validate_footer();
|
|
|
|
NonnullOwnPtr<Core::File> m_file;
|
|
int m_fd { -1 };
|
|
|
|
RefPtr<Core::Notifier> m_pipe_write_notifier;
|
|
int m_pipe_fd { -1 };
|
|
|
|
Function<void(u64)> m_on_pipe_complete;
|
|
Function<void(u64)> m_on_pipe_error;
|
|
u64 m_bytes_piped { 0 };
|
|
|
|
Optional<String> m_reason_phrase;
|
|
HTTP::HeaderMap m_headers;
|
|
|
|
u64 const m_data_offset { 0 };
|
|
u64 const m_data_size { 0 };
|
|
};
|
|
|
|
}
|