LibWeb: Throw out decoded UTF-32 data in HTMLTokenizer after parser runs

This ends up saving quite a bit of memory on many pages, since UTF-32
uses 4 bytes per code points.

As an example, it reduces the footprint on https://gymgrossisten.com/
by 2 MiB.
This commit is contained in:
Andreas Kling 2025-10-23 21:45:00 +02:00 committed by Andreas Kling
parent b10f2993b3
commit 3593c3b687
Notes: github-actions[bot] 2025-10-24 06:54:24 +00:00
3 changed files with 18 additions and 0 deletions

View file

@ -259,6 +259,8 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point
} }
flush_character_insertions(); flush_character_insertions();
m_tokenizer.parser_did_run({});
} }
void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)

View file

@ -2895,6 +2895,20 @@ HTMLTokenizer::HTMLTokenizer(StringView input, ByteString const& encoding)
m_source_positions.empend(0u, 0u); m_source_positions.empend(0u, 0u);
} }
void HTMLTokenizer::parser_did_run(Badge<HTMLParser>)
{
// OPTIMIZATION: If we've consumed all input and the insertion point is at the start,
// we can throw away the decoded input buffer to save memory.
if (m_current_offset > 0
&& static_cast<size_t>(m_current_offset) == m_decoded_input.size()
&& (!m_insertion_point.has_value() || *m_insertion_point == 0)
&& (!m_old_insertion_point.has_value() || *m_old_insertion_point == 0)) {
m_decoded_input.clear();
m_current_offset = 0;
m_prev_offset = 0;
}
}
void HTMLTokenizer::insert_input_at_insertion_point(StringView input) void HTMLTokenizer::insert_input_at_insertion_point(StringView input)
{ {
Vector<u32> new_decoded_input; Vector<u32> new_decoded_input;

View file

@ -145,6 +145,8 @@ public:
// This permanently cuts off the tokenizer input stream. // This permanently cuts off the tokenizer input stream.
void abort() { m_aborted = true; } void abort() { m_aborted = true; }
void parser_did_run(Badge<HTMLParser>);
private: private:
void skip(size_t count); void skip(size_t count);
Optional<u32> next_code_point(StopAtInsertionPoint); Optional<u32> next_code_point(StopAtInsertionPoint);