mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-07 21:59:54 +00:00
LibWeb: Throw out decoded UTF-32 data in HTMLTokenizer after parser runs
This ends up saving quite a bit of memory on many pages, since UTF-32 uses 4 bytes per code points. As an example, it reduces the footprint on https://gymgrossisten.com/ by 2 MiB.
This commit is contained in:
parent
b10f2993b3
commit
3593c3b687
Notes:
github-actions[bot]
2025-10-24 06:54:24 +00:00
Author: https://github.com/awesomekling
Commit: 3593c3b687
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6561
3 changed files with 18 additions and 0 deletions
|
|
@ -259,6 +259,8 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point
|
||||||
}
|
}
|
||||||
|
|
||||||
flush_character_insertions();
|
flush_character_insertions();
|
||||||
|
|
||||||
|
m_tokenizer.parser_did_run({});
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
|
void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
|
||||||
|
|
|
||||||
|
|
@ -2895,6 +2895,20 @@ HTMLTokenizer::HTMLTokenizer(StringView input, ByteString const& encoding)
|
||||||
m_source_positions.empend(0u, 0u);
|
m_source_positions.empend(0u, 0u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLTokenizer::parser_did_run(Badge<HTMLParser>)
|
||||||
|
{
|
||||||
|
// OPTIMIZATION: If we've consumed all input and the insertion point is at the start,
|
||||||
|
// we can throw away the decoded input buffer to save memory.
|
||||||
|
if (m_current_offset > 0
|
||||||
|
&& static_cast<size_t>(m_current_offset) == m_decoded_input.size()
|
||||||
|
&& (!m_insertion_point.has_value() || *m_insertion_point == 0)
|
||||||
|
&& (!m_old_insertion_point.has_value() || *m_old_insertion_point == 0)) {
|
||||||
|
m_decoded_input.clear();
|
||||||
|
m_current_offset = 0;
|
||||||
|
m_prev_offset = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void HTMLTokenizer::insert_input_at_insertion_point(StringView input)
|
void HTMLTokenizer::insert_input_at_insertion_point(StringView input)
|
||||||
{
|
{
|
||||||
Vector<u32> new_decoded_input;
|
Vector<u32> new_decoded_input;
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,8 @@ public:
|
||||||
// This permanently cuts off the tokenizer input stream.
|
// This permanently cuts off the tokenizer input stream.
|
||||||
void abort() { m_aborted = true; }
|
void abort() { m_aborted = true; }
|
||||||
|
|
||||||
|
void parser_did_run(Badge<HTMLParser>);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void skip(size_t count);
|
void skip(size_t count);
|
||||||
Optional<u32> next_code_point(StopAtInsertionPoint);
|
Optional<u32> next_code_point(StopAtInsertionPoint);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue