mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2026-04-19 02:10:26 +00:00
Teach LibWebView autocomplete to query HistoryStore before falling back to remote engines and move the wiring out of the AppKit frontend. Refine matching so scheme and www. boilerplate do not dominate results, short title and substring queries stay quiet, and history tracing can explain what the ranking code is doing.
528 lines
17 KiB
C++
528 lines
17 KiB
C++
/*
|
|
* Copyright (c) 2026-present, the Ladybird developers.
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Debug.h>
|
|
#include <AK/QuickSort.h>
|
|
#include <AK/Utf8View.h>
|
|
#include <LibDatabase/Database.h>
|
|
#include <LibURL/URL.h>
|
|
#include <LibWebView/HistoryDebug.h>
|
|
#include <LibWebView/HistoryStore.h>
|
|
|
|
namespace WebView {
|
|
|
|
static constexpr auto DEFAULT_AUTOCOMPLETE_SUGGESTION_LIMIT = 8uz;
|
|
static constexpr size_t MINIMUM_TITLE_AUTOCOMPLETE_QUERY_LENGTH = 3;
|
|
|
|
static Optional<StringView> url_without_scheme(StringView url)
|
|
{
|
|
auto scheme_separator = url.find("://"sv);
|
|
if (!scheme_separator.has_value())
|
|
return {};
|
|
|
|
return url.substring_view(*scheme_separator + 3);
|
|
}
|
|
|
|
static StringView autocomplete_searchable_url(StringView url)
|
|
{
|
|
auto stripped_url = url_without_scheme(url).value_or(url);
|
|
if (stripped_url.starts_with("www."sv, CaseSensitivity::CaseInsensitive))
|
|
stripped_url = stripped_url.substring_view(4);
|
|
|
|
return stripped_url;
|
|
}
|
|
|
|
static StringView autocomplete_url_query(StringView query)
|
|
{
|
|
auto stripped_query = url_without_scheme(query).value_or(query);
|
|
if (stripped_query.starts_with("www."sv, CaseSensitivity::CaseInsensitive))
|
|
stripped_query = stripped_query.substring_view(4);
|
|
|
|
return stripped_query;
|
|
}
|
|
|
|
static StringView autocomplete_title_query(StringView query)
|
|
{
|
|
if (Utf8View { query }.length() < MINIMUM_TITLE_AUTOCOMPLETE_QUERY_LENGTH)
|
|
return {};
|
|
|
|
return query;
|
|
}
|
|
|
|
static StringView autocomplete_url_contains_query(StringView query)
|
|
{
|
|
// Non-prefix URL matches get noisy very quickly, so only enable them
|
|
// once the user has typed enough to disambiguate path fragments.
|
|
if (Utf8View { query }.length() < MINIMUM_TITLE_AUTOCOMPLETE_QUERY_LENGTH)
|
|
return {};
|
|
|
|
return query;
|
|
}
|
|
|
|
static bool matches_query(HistoryEntry const& entry, StringView title_query, StringView url_query)
|
|
{
|
|
auto searchable_url = autocomplete_searchable_url(entry.url.bytes_as_string_view());
|
|
if (!url_query.is_empty() && searchable_url.starts_with(url_query, CaseSensitivity::CaseInsensitive))
|
|
return true;
|
|
|
|
auto url_contains_query = autocomplete_url_contains_query(url_query);
|
|
if (!url_contains_query.is_empty() && searchable_url.contains(url_contains_query, CaseSensitivity::CaseInsensitive))
|
|
return true;
|
|
|
|
return !title_query.is_empty()
|
|
&& entry.title.has_value()
|
|
&& entry.title->contains(title_query, CaseSensitivity::CaseInsensitive);
|
|
}
|
|
|
|
static u8 match_rank(HistoryEntry const& entry, StringView title_query, StringView url_query)
|
|
{
|
|
auto searchable_url = autocomplete_searchable_url(entry.url.bytes_as_string_view());
|
|
|
|
if (!url_query.is_empty()) {
|
|
if (searchable_url.equals_ignoring_ascii_case(url_query))
|
|
return 0;
|
|
if (searchable_url.starts_with(url_query, CaseSensitivity::CaseInsensitive))
|
|
return 1;
|
|
}
|
|
|
|
if (!title_query.is_empty() && entry.title.has_value() && entry.title->starts_with_bytes(title_query, CaseSensitivity::CaseInsensitive))
|
|
return 2;
|
|
|
|
return 3;
|
|
}
|
|
|
|
static void sort_matching_entries(Vector<HistoryEntry const*>& matches, StringView title_query, StringView url_query)
|
|
{
|
|
quick_sort(matches, [&](auto const* left, auto const* right) {
|
|
auto left_rank = match_rank(*left, title_query, url_query);
|
|
auto right_rank = match_rank(*right, title_query, url_query);
|
|
if (left_rank != right_rank)
|
|
return left_rank < right_rank;
|
|
|
|
if (left->visit_count != right->visit_count)
|
|
return left->visit_count > right->visit_count;
|
|
|
|
if (left->last_visited_time != right->last_visited_time)
|
|
return left->last_visited_time > right->last_visited_time;
|
|
|
|
return left->url < right->url;
|
|
});
|
|
}
|
|
|
|
ErrorOr<NonnullOwnPtr<HistoryStore>> HistoryStore::create(Database::Database& database)
|
|
{
|
|
if (auto database_path = database.database_path(); database_path.has_value())
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Opening persisted history store at {}", database_path->string());
|
|
else
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Opening memory-backed persisted history store");
|
|
|
|
Statements statements {};
|
|
|
|
auto create_history_table = TRY(database.prepare_statement(R"#(
|
|
CREATE TABLE IF NOT EXISTS History (
|
|
url TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
visit_count INTEGER NOT NULL,
|
|
last_visited_time INTEGER NOT NULL
|
|
);
|
|
)#"sv));
|
|
database.execute_statement(create_history_table, {});
|
|
|
|
auto create_last_visited_index = TRY(database.prepare_statement(R"#(
|
|
CREATE INDEX IF NOT EXISTS HistoryLastVisitedTimeIndex
|
|
ON History(last_visited_time DESC);
|
|
)#"sv));
|
|
database.execute_statement(create_last_visited_index, {});
|
|
|
|
statements.upsert_entry = TRY(database.prepare_statement(R"#(
|
|
INSERT INTO History (url, title, visit_count, last_visited_time)
|
|
VALUES (?, ?, 1, ?)
|
|
ON CONFLICT(url) DO UPDATE SET
|
|
title = CASE
|
|
WHEN excluded.title != '' THEN excluded.title
|
|
ELSE History.title
|
|
END,
|
|
visit_count = History.visit_count + 1,
|
|
last_visited_time = excluded.last_visited_time;
|
|
)#"sv));
|
|
statements.update_title = TRY(database.prepare_statement(R"#(
|
|
UPDATE History
|
|
SET title = ?
|
|
WHERE url = ?;
|
|
)#"sv));
|
|
statements.get_entry = TRY(database.prepare_statement(R"#(
|
|
SELECT title, visit_count, last_visited_time
|
|
FROM History
|
|
WHERE url = ?;
|
|
)#"sv));
|
|
statements.search_entries = TRY(database.prepare_statement(R"#(
|
|
SELECT url
|
|
FROM (
|
|
SELECT
|
|
url,
|
|
title,
|
|
visit_count,
|
|
last_visited_time,
|
|
CASE
|
|
WHEN LOWER(CASE
|
|
WHEN INSTR(url, '://') > 0 THEN SUBSTR(url, INSTR(url, '://') + 3)
|
|
ELSE url
|
|
END) LIKE 'www.%'
|
|
THEN SUBSTR(CASE
|
|
WHEN INSTR(url, '://') > 0 THEN SUBSTR(url, INSTR(url, '://') + 3)
|
|
ELSE url
|
|
END, 5)
|
|
ELSE CASE
|
|
WHEN INSTR(url, '://') > 0 THEN SUBSTR(url, INSTR(url, '://') + 3)
|
|
ELSE url
|
|
END
|
|
END AS searchable_url
|
|
FROM History
|
|
)
|
|
WHERE ((? != '' AND LOWER(searchable_url) LIKE LOWER(?) || '%')
|
|
OR (? != '' AND INSTR(LOWER(searchable_url), LOWER(?)) > 0)
|
|
OR (? != '' AND INSTR(LOWER(title), LOWER(?)) > 0))
|
|
ORDER BY
|
|
CASE
|
|
WHEN ? != '' AND LOWER(searchable_url) = LOWER(?) THEN 0
|
|
WHEN ? != '' AND LOWER(searchable_url) LIKE LOWER(?) || '%' THEN 1
|
|
WHEN ? != '' AND LOWER(title) LIKE LOWER(?) || '%' THEN 2
|
|
ELSE 3
|
|
END,
|
|
visit_count DESC,
|
|
last_visited_time DESC,
|
|
url ASC
|
|
LIMIT ?;
|
|
)#"sv));
|
|
statements.clear_entries = TRY(database.prepare_statement("DELETE FROM History;"sv));
|
|
statements.delete_entries_accessed_since = TRY(database.prepare_statement("DELETE FROM History WHERE last_visited_time >= ?;"sv));
|
|
|
|
return adopt_own(*new HistoryStore { PersistedStorage { database, statements } });
|
|
}
|
|
|
|
NonnullOwnPtr<HistoryStore> HistoryStore::create()
|
|
{
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Opening transient history store");
|
|
|
|
return adopt_own(*new HistoryStore { OptionalNone {} });
|
|
}
|
|
|
|
NonnullOwnPtr<HistoryStore> HistoryStore::create_disabled()
|
|
{
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Opening disabled history store");
|
|
|
|
return adopt_own(*new HistoryStore { OptionalNone {}, true });
|
|
}
|
|
|
|
HistoryStore::HistoryStore(Optional<PersistedStorage> persisted_storage, bool is_disabled)
|
|
: m_persisted_storage(move(persisted_storage))
|
|
, m_is_disabled(is_disabled)
|
|
{
|
|
}
|
|
|
|
HistoryStore::~HistoryStore() = default;
|
|
|
|
Optional<String> HistoryStore::normalize_url(URL::URL const& url)
|
|
{
|
|
if (url.scheme().is_empty()) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Skipping history entry without a scheme: {}", url);
|
|
return {};
|
|
}
|
|
|
|
if (url.scheme().is_one_of("about"sv, "data"sv)) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Skipping non-browsable history URL: {}", url);
|
|
return {};
|
|
}
|
|
|
|
auto normalized_url = url.serialize(URL::ExcludeFragment::Yes);
|
|
if (normalized_url.is_empty()) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Skipping history entry with an empty normalized URL: {}", url);
|
|
return {};
|
|
}
|
|
|
|
return normalized_url;
|
|
}
|
|
|
|
void HistoryStore::record_visit(URL::URL const& url, Optional<String> title, UnixDateTime visited_at)
|
|
{
|
|
if (m_is_disabled)
|
|
return;
|
|
|
|
auto normalized_url = normalize_url(url);
|
|
if (!normalized_url.has_value())
|
|
return;
|
|
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Recording visit in {} store: url='{}' title='{}' visited_at={}",
|
|
m_persisted_storage.has_value() ? "SQL"sv : "transient"sv,
|
|
*normalized_url,
|
|
title.has_value() ? title->bytes_as_string_view() : "<none>"sv,
|
|
visited_at.seconds_since_epoch());
|
|
|
|
if (m_persisted_storage.has_value())
|
|
m_persisted_storage->record_visit(*normalized_url, title, visited_at);
|
|
else
|
|
m_transient_storage.record_visit(normalized_url.release_value(), move(title), visited_at);
|
|
}
|
|
|
|
void HistoryStore::update_title(URL::URL const& url, String const& title)
|
|
{
|
|
if (m_is_disabled)
|
|
return;
|
|
|
|
if (title.is_empty()) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Ignoring empty history title update for {}", url);
|
|
return;
|
|
}
|
|
|
|
auto normalized_url = normalize_url(url);
|
|
if (!normalized_url.has_value())
|
|
return;
|
|
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Updating history title in {} store: url='{}' title='{}'",
|
|
m_persisted_storage.has_value() ? "SQL"sv : "transient"sv,
|
|
*normalized_url,
|
|
title);
|
|
|
|
if (m_persisted_storage.has_value())
|
|
m_persisted_storage->update_title(*normalized_url, title);
|
|
else
|
|
m_transient_storage.update_title(*normalized_url, title);
|
|
}
|
|
|
|
Optional<HistoryEntry> HistoryStore::entry_for_url(URL::URL const& url)
|
|
{
|
|
if (m_is_disabled)
|
|
return {};
|
|
|
|
auto normalized_url = normalize_url(url);
|
|
if (!normalized_url.has_value())
|
|
return {};
|
|
|
|
auto entry = m_persisted_storage.has_value()
|
|
? m_persisted_storage->entry_for_url(*normalized_url)
|
|
: m_transient_storage.entry_for_url(*normalized_url);
|
|
|
|
if (entry.has_value()) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Found history entry for '{}': title='{}' visits={} last_visited={}",
|
|
entry->url,
|
|
entry->title.has_value() ? entry->title->bytes_as_string_view() : "<none>"sv,
|
|
entry->visit_count,
|
|
entry->last_visited_time.seconds_since_epoch());
|
|
} else {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] No history entry found for '{}'", *normalized_url);
|
|
}
|
|
|
|
return entry;
|
|
}
|
|
|
|
Vector<String> HistoryStore::autocomplete_suggestions(StringView query, size_t limit)
|
|
{
|
|
if (m_is_disabled)
|
|
return {};
|
|
|
|
auto trimmed_query = query.trim_whitespace();
|
|
if (trimmed_query.is_empty()) {
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] History autocomplete query is empty after trimming");
|
|
return {};
|
|
}
|
|
|
|
auto title_query = autocomplete_title_query(trimmed_query);
|
|
auto url_query = autocomplete_url_query(trimmed_query);
|
|
|
|
auto suggestions = m_persisted_storage.has_value()
|
|
? m_persisted_storage->autocomplete_suggestions(title_query, url_query, limit)
|
|
: m_transient_storage.autocomplete_suggestions(title_query, url_query, limit);
|
|
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] {} history autocomplete suggestions for '{}' (title_query='{}', url_query='{}', limit={}): {}",
|
|
m_persisted_storage.has_value() ? "SQL"sv : "Transient"sv,
|
|
trimmed_query,
|
|
title_query,
|
|
url_query,
|
|
limit,
|
|
history_log_suggestions(suggestions));
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
void HistoryStore::clear()
|
|
{
|
|
if (m_is_disabled)
|
|
return;
|
|
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Clearing {} history store", m_persisted_storage.has_value() ? "SQL"sv : "transient"sv);
|
|
if (m_persisted_storage.has_value())
|
|
m_persisted_storage->clear();
|
|
else
|
|
m_transient_storage.clear();
|
|
}
|
|
|
|
void HistoryStore::remove_entries_accessed_since(UnixDateTime since)
|
|
{
|
|
if (m_is_disabled)
|
|
return;
|
|
|
|
dbgln_if(WEBVIEW_HISTORY_DEBUG, "[History] Removing {} history entries accessed since {}",
|
|
m_persisted_storage.has_value() ? "SQL"sv : "transient"sv,
|
|
since.seconds_since_epoch());
|
|
if (m_persisted_storage.has_value())
|
|
m_persisted_storage->remove_entries_accessed_since(since);
|
|
else
|
|
m_transient_storage.remove_entries_accessed_since(since);
|
|
}
|
|
|
|
void HistoryStore::TransientStorage::record_visit(String url, Optional<String> title, UnixDateTime visited_at)
|
|
{
|
|
auto entry = m_entries.find(url);
|
|
if (entry == m_entries.end()) {
|
|
auto new_entry = HistoryEntry {
|
|
.url = url,
|
|
.title = move(title),
|
|
.visit_count = 1,
|
|
.last_visited_time = visited_at,
|
|
};
|
|
m_entries.set(
|
|
move(url),
|
|
move(new_entry));
|
|
return;
|
|
}
|
|
|
|
entry->value.visit_count++;
|
|
entry->value.last_visited_time = visited_at;
|
|
if (title.has_value() && !title->is_empty())
|
|
entry->value.title = move(title);
|
|
}
|
|
|
|
void HistoryStore::TransientStorage::update_title(String const& url, String title)
|
|
{
|
|
auto entry = m_entries.find(url);
|
|
if (entry == m_entries.end())
|
|
return;
|
|
|
|
entry->value.title = move(title);
|
|
}
|
|
|
|
Optional<HistoryEntry> HistoryStore::TransientStorage::entry_for_url(String const& url)
|
|
{
|
|
auto entry = m_entries.get(url);
|
|
if (!entry.has_value())
|
|
return {};
|
|
|
|
return *entry;
|
|
}
|
|
|
|
Vector<String> HistoryStore::TransientStorage::autocomplete_suggestions(StringView title_query, StringView url_query, size_t limit)
|
|
{
|
|
Vector<HistoryEntry const*> matches;
|
|
|
|
for (auto const& entry : m_entries) {
|
|
if (matches_query(entry.value, title_query, url_query))
|
|
matches.append(&entry.value);
|
|
}
|
|
|
|
sort_matching_entries(matches, title_query, url_query);
|
|
|
|
Vector<String> suggestions;
|
|
suggestions.ensure_capacity(min(limit, matches.size()));
|
|
|
|
for (size_t i = 0; i < matches.size() && i < limit; ++i)
|
|
suggestions.unchecked_append(matches[i]->url);
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
void HistoryStore::TransientStorage::clear()
|
|
{
|
|
m_entries.clear();
|
|
}
|
|
|
|
void HistoryStore::TransientStorage::remove_entries_accessed_since(UnixDateTime since)
|
|
{
|
|
m_entries.remove_all_matching([&](auto const&, auto const& entry) {
|
|
return entry.last_visited_time >= since;
|
|
});
|
|
}
|
|
|
|
void HistoryStore::PersistedStorage::record_visit(String const& url, Optional<String> const& title, UnixDateTime visited_at)
|
|
{
|
|
database.execute_statement(
|
|
statements.upsert_entry,
|
|
{},
|
|
url,
|
|
title.value_or(String {}),
|
|
visited_at);
|
|
}
|
|
|
|
void HistoryStore::PersistedStorage::update_title(String const& url, String const& title)
|
|
{
|
|
database.execute_statement(
|
|
statements.update_title,
|
|
{},
|
|
title,
|
|
url);
|
|
}
|
|
|
|
Optional<HistoryEntry> HistoryStore::PersistedStorage::entry_for_url(String const& url)
|
|
{
|
|
Optional<HistoryEntry> entry;
|
|
|
|
database.execute_statement(
|
|
statements.get_entry,
|
|
[&](auto statement_id) {
|
|
auto title = database.result_column<String>(statement_id, 0);
|
|
|
|
entry = HistoryEntry {
|
|
.url = url,
|
|
.title = title.is_empty() ? Optional<String> {} : Optional<String> { move(title) },
|
|
.visit_count = database.result_column<u64>(statement_id, 1),
|
|
.last_visited_time = database.result_column<UnixDateTime>(statement_id, 2),
|
|
};
|
|
},
|
|
url);
|
|
|
|
return entry;
|
|
}
|
|
|
|
Vector<String> HistoryStore::PersistedStorage::autocomplete_suggestions(StringView title_query, StringView url_query, size_t limit)
|
|
{
|
|
Vector<String> suggestions;
|
|
suggestions.ensure_capacity(min(limit, DEFAULT_AUTOCOMPLETE_SUGGESTION_LIMIT));
|
|
auto url_query_string = MUST(String::from_utf8(url_query));
|
|
auto title_query_string = MUST(String::from_utf8(title_query));
|
|
auto url_contains_query_string = MUST(String::from_utf8(autocomplete_url_contains_query(url_query)));
|
|
|
|
database.execute_statement(
|
|
statements.search_entries,
|
|
[&](auto statement_id) {
|
|
suggestions.append(database.result_column<String>(statement_id, 0));
|
|
},
|
|
url_query_string,
|
|
url_query_string,
|
|
url_contains_query_string,
|
|
url_contains_query_string,
|
|
title_query_string,
|
|
title_query_string,
|
|
url_query_string,
|
|
url_query_string,
|
|
url_query_string,
|
|
url_query_string,
|
|
title_query_string,
|
|
title_query_string,
|
|
static_cast<i64>(limit));
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
void HistoryStore::PersistedStorage::clear()
|
|
{
|
|
database.execute_statement(statements.clear_entries, {});
|
|
}
|
|
|
|
void HistoryStore::PersistedStorage::remove_entries_accessed_since(UnixDateTime since)
|
|
{
|
|
database.execute_statement(statements.delete_entries_accessed_since, {}, since);
|
|
}
|
|
|
|
}
|