LibJS: Limit the length of strings we put in the VM string cache

For excessively long strings, we often end up spending a ton of time
hashing and comparing them, and it basically ruins the value of the
cache as optimization.

This commit puts a cap (256) on the length of strings we put into the
cache. The number is arbitrary and there may be value in tuning it.
This commit is contained in:
Andreas Kling 2025-11-29 10:56:18 +01:00 committed by Tim Flynn
parent b6ef80ff36
commit 124b4fc06a
Notes: github-actions[bot] 2025-11-29 14:40:58 +00:00

View file

@ -21,6 +21,10 @@
namespace JS {
// Strings shorter than or equal to this length are cached in the VM and deduplicated.
// Longer strings are not cached to avoid excessive hashing and lookup costs.
static constexpr size_t MAX_LENGTH_FOR_STRING_CACHE = 256;
GC_DEFINE_ALLOCATOR(PrimitiveString);
GC_DEFINE_ALLOCATOR(RopeString);
@ -29,11 +33,17 @@ GC::Ref<PrimitiveString> PrimitiveString::create(VM& vm, Utf16String const& stri
if (string.is_empty())
return vm.empty_string();
if (string.length_in_code_units() == 1) {
auto const length_in_code_units = string.length_in_code_units();
if (length_in_code_units == 1) {
if (auto code_unit = string.code_unit_at(0); is_ascii(code_unit))
return vm.single_ascii_character_string(static_cast<u8>(code_unit));
}
if (length_in_code_units > MAX_LENGTH_FOR_STRING_CACHE) {
return vm.heap().allocate<PrimitiveString>(string);
}
auto& string_cache = vm.utf16_string_cache();
if (auto it = string_cache.find(string); it != string_cache.end())
return *it->value;
@ -58,11 +68,18 @@ GC::Ref<PrimitiveString> PrimitiveString::create(VM& vm, String const& string)
if (string.is_empty())
return vm.empty_string();
if (auto bytes = string.bytes_as_string_view(); bytes.length() == 1) {
if (auto ch = static_cast<u8>(bytes[0]); is_ascii(ch))
auto const length_in_code_units = string.length_in_code_units();
if (length_in_code_units == 1) {
auto bytes = string.bytes();
if (auto ch = bytes[0]; is_ascii(ch))
return vm.single_ascii_character_string(ch);
}
if (string.length_in_code_units() > MAX_LENGTH_FOR_STRING_CACHE) {
return vm.heap().allocate<PrimitiveString>(string);
}
auto& string_cache = vm.string_cache();
if (auto it = string_cache.find(string); it != string_cache.end())
return *it->value;
@ -125,10 +142,16 @@ PrimitiveString::PrimitiveString(String string)
PrimitiveString::~PrimitiveString()
{
if (has_utf16_string())
vm().utf16_string_cache().remove(*m_utf16_string);
if (has_utf8_string())
vm().string_cache().remove(*m_utf8_string);
if (has_utf16_string()) {
auto const& string = *m_utf16_string;
if (string.length_in_code_units() <= MAX_LENGTH_FOR_STRING_CACHE)
vm().utf16_string_cache().remove(string);
}
if (has_utf8_string()) {
auto const& string = *m_utf8_string;
if (string.length_in_code_units() <= MAX_LENGTH_FOR_STRING_CACHE)
vm().string_cache().remove(*m_utf8_string);
}
}
bool PrimitiveString::is_empty() const