mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2026-04-18 18:00:31 +00:00
HTMLParser::the_end() had three spin_until calls that blocked the event loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8 (load event delay). This replaces them with an HTMLParserEndState state machine that progresses asynchronously via callbacks. The state machine has three phases matching the three spin_until calls: - WaitingForDeferredScripts: loops executing ready deferred scripts - WaitingForASAPScripts: waits for ASAP script lists to empty - WaitingForLoadEventDelay: waits for nothing to delay the load event Notification triggers re-evaluate the state machine when conditions change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and DocumentLoadEventDelayer decrements. NavigableContainer state changes (session history readiness, content navigable cleared, lazy load flag) also trigger re-evaluation of the load event delay check. Key design decisions and why: 1. Microtask checkpoint in schedule_progress_check(): The old spin_until called perform_a_microtask_checkpoint() before checking conditions. This is critical because HTMLImageElement::update_the_image_data step 8 queues a microtask that creates the DocumentLoadEventDelayer. Without the checkpoint, check_progress() would see zero delayers and complete before images start delaying the load event. 2. deferred_invoke in schedule_progress_check(): I tried Core::Timer (0ms), queue_global_task, and synchronous calls. Timers caused non-deterministic ordering with the HTML event loop's task processing timer, leading to image layout tests failing (wrong subtest pass/fail patterns). Synchronous calls fired too early during image load processing before dimensions were set, causing 0-height images in layout tests. queue_global_task had task ordering issues with the session history traversal queue. deferred_invoke runs after the current callback returns but within the same event loop pump, giving the right balance. 3. Navigation load event guard (m_navigation_load_event_guard): During cross-document navigation, finalize_a_cross_document_navigation step 2 calls set_delaying_load_events(false) before the session history traversal activates the new document. This creates a transient state where the parent's load event delay check sees the about:blank (which has ready_for_post_load_tasks=true) as the active document and completes prematurely.
258 lines
9.3 KiB
C++
258 lines
9.3 KiB
C++
/*
|
|
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <LibGfx/Color.h>
|
|
#include <LibJS/Heap/Cell.h>
|
|
#include <LibWeb/DOM/FragmentSerializationMode.h>
|
|
#include <LibWeb/Export.h>
|
|
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
|
|
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
|
|
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
|
|
#include <LibWeb/MimeSniff/MimeType.h>
|
|
#include <LibWeb/Platform/Timer.h>
|
|
|
|
namespace Web::HTML {
|
|
|
|
#define ENUMERATE_INSERTION_MODES \
|
|
__ENUMERATE_INSERTION_MODE(Initial) \
|
|
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
|
|
__ENUMERATE_INSERTION_MODE(BeforeHead) \
|
|
__ENUMERATE_INSERTION_MODE(InHead) \
|
|
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
|
|
__ENUMERATE_INSERTION_MODE(AfterHead) \
|
|
__ENUMERATE_INSERTION_MODE(InBody) \
|
|
__ENUMERATE_INSERTION_MODE(Text) \
|
|
__ENUMERATE_INSERTION_MODE(InTable) \
|
|
__ENUMERATE_INSERTION_MODE(InTableText) \
|
|
__ENUMERATE_INSERTION_MODE(InCaption) \
|
|
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
|
|
__ENUMERATE_INSERTION_MODE(InTableBody) \
|
|
__ENUMERATE_INSERTION_MODE(InRow) \
|
|
__ENUMERATE_INSERTION_MODE(InCell) \
|
|
__ENUMERATE_INSERTION_MODE(InTemplate) \
|
|
__ENUMERATE_INSERTION_MODE(AfterBody) \
|
|
__ENUMERATE_INSERTION_MODE(InFrameset) \
|
|
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
|
|
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
|
|
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
|
|
|
|
class WEB_API HTMLParser final : public JS::Cell {
|
|
GC_CELL(HTMLParser, JS::Cell);
|
|
GC_DECLARE_ALLOCATOR(HTMLParser);
|
|
|
|
friend class HTMLTokenizer;
|
|
|
|
public:
|
|
~HTMLParser();
|
|
|
|
static GC::Ref<HTMLParser> create_for_scripting(DOM::Document&);
|
|
static GC::Ref<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type = {});
|
|
static GC::Ref<HTMLParser> create(DOM::Document&, StringView input, StringView encoding);
|
|
|
|
void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
|
|
void run(URL::URL const&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
|
|
|
|
static void the_end(GC::Ref<DOM::Document>, GC::Ptr<HTMLParser> = nullptr);
|
|
|
|
DOM::Document& document();
|
|
enum class AllowDeclarativeShadowRoots {
|
|
No,
|
|
Yes,
|
|
};
|
|
static WebIDL::ExceptionOr<Vector<GC::Root<DOM::Node>>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No);
|
|
|
|
enum class SerializableShadowRoots {
|
|
No,
|
|
Yes,
|
|
};
|
|
static String serialize_html_fragment(DOM::Node const&, SerializableShadowRoots, Vector<GC::Root<DOM::ShadowRoot>> const&, DOM::FragmentSerializationMode = DOM::FragmentSerializationMode::Inner);
|
|
|
|
enum class InsertionMode {
|
|
#define __ENUMERATE_INSERTION_MODE(mode) mode,
|
|
ENUMERATE_INSERTION_MODES
|
|
#undef __ENUMERATE_INSERTION_MODE
|
|
};
|
|
|
|
InsertionMode insertion_mode() const { return m_insertion_mode; }
|
|
|
|
static bool is_special_tag(FlyString const& tag_name, Optional<FlyString> const& namespace_);
|
|
|
|
HTMLTokenizer& tokenizer() { return m_tokenizer; }
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
|
|
void abort();
|
|
|
|
bool aborted() const { return m_aborted; }
|
|
bool stopped() const { return m_stop_parsing; }
|
|
|
|
size_t script_nesting_level() const { return m_script_nesting_level; }
|
|
|
|
private:
|
|
HTMLParser(DOM::Document&, StringView input, StringView encoding);
|
|
HTMLParser(DOM::Document&);
|
|
|
|
virtual void visit_edges(Cell::Visitor&) override;
|
|
virtual void initialize(JS::Realm&) override;
|
|
|
|
char const* insertion_mode_name() const;
|
|
|
|
DOM::QuirksMode which_quirks_mode(HTMLToken const&) const;
|
|
|
|
void handle_initial(HTMLToken&);
|
|
void handle_before_html(HTMLToken&);
|
|
void handle_before_head(HTMLToken&);
|
|
void handle_in_head(HTMLToken&);
|
|
void handle_in_head_noscript(HTMLToken&);
|
|
void handle_after_head(HTMLToken&);
|
|
void handle_in_body(HTMLToken&);
|
|
void handle_after_body(HTMLToken&);
|
|
void handle_after_after_body(HTMLToken&);
|
|
void handle_text(HTMLToken&);
|
|
void handle_in_table(HTMLToken&);
|
|
void handle_in_table_body(HTMLToken&);
|
|
void handle_in_row(HTMLToken&);
|
|
void handle_in_cell(HTMLToken&);
|
|
void handle_in_table_text(HTMLToken&);
|
|
void handle_in_caption(HTMLToken&);
|
|
void handle_in_column_group(HTMLToken&);
|
|
void handle_in_template(HTMLToken&);
|
|
void handle_in_frameset(HTMLToken&);
|
|
void handle_after_frameset(HTMLToken&);
|
|
void handle_after_after_frameset(HTMLToken&);
|
|
|
|
void stop_parsing() { m_stop_parsing = true; }
|
|
|
|
void generate_implied_end_tags(FlyString const& exception = {});
|
|
void generate_all_implied_end_tags_thoroughly();
|
|
GC::Ref<DOM::Element> create_element_for(HTMLToken const&, Optional<FlyString> const& namespace_, DOM::Node& intended_parent);
|
|
|
|
struct AdjustedInsertionLocation {
|
|
GC::Ptr<DOM::Node> parent;
|
|
GC::Ptr<DOM::Node> insert_before_sibling;
|
|
};
|
|
|
|
AdjustedInsertionLocation find_appropriate_place_for_inserting_node(GC::Ptr<DOM::Element> override_target = nullptr);
|
|
|
|
void insert_an_element_at_the_adjusted_insertion_location(GC::Ref<DOM::Element>);
|
|
|
|
DOM::Text* find_character_insertion_node();
|
|
void flush_character_insertions();
|
|
enum class OnlyAddToElementStack {
|
|
No,
|
|
Yes,
|
|
};
|
|
GC::Ref<DOM::Element> insert_foreign_element(HTMLToken const&, Optional<FlyString> const& namespace_, OnlyAddToElementStack);
|
|
GC::Ref<DOM::Element> insert_html_element(HTMLToken const&);
|
|
[[nodiscard]] GC::Ptr<DOM::Element> current_node();
|
|
[[nodiscard]] GC::Ptr<DOM::Element> adjusted_current_node();
|
|
[[nodiscard]] GC::Ptr<DOM::Element> node_before_current_node();
|
|
void insert_character(u32 data);
|
|
void insert_comment(HTMLToken&);
|
|
void reconstruct_the_active_formatting_elements();
|
|
void close_a_p_element();
|
|
void process_using_the_rules_for(InsertionMode, HTMLToken&);
|
|
void process_using_the_rules_for_foreign_content(HTMLToken&);
|
|
void parse_generic_raw_text_element(HTMLToken&);
|
|
void increment_script_nesting_level();
|
|
void decrement_script_nesting_level();
|
|
void reset_the_insertion_mode_appropriately();
|
|
|
|
void handle_element_popped(DOM::Element&);
|
|
|
|
void adjust_mathml_attributes(HTMLToken&);
|
|
void adjust_svg_tag_names(HTMLToken&);
|
|
void adjust_svg_attributes(HTMLToken&);
|
|
static void adjust_foreign_attributes(HTMLToken&);
|
|
|
|
enum AdoptionAgencyAlgorithmOutcome {
|
|
DoNothing,
|
|
RunAnyOtherEndTagSteps,
|
|
};
|
|
|
|
AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&);
|
|
void clear_the_stack_back_to_a_table_context();
|
|
void clear_the_stack_back_to_a_table_body_context();
|
|
void clear_the_stack_back_to_a_table_row_context();
|
|
void close_the_cell();
|
|
|
|
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
|
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
|
|
|
StackOfOpenElements m_stack_of_open_elements;
|
|
Vector<InsertionMode> m_stack_of_template_insertion_modes;
|
|
ListOfActiveFormattingElements m_list_of_active_formatting_elements;
|
|
|
|
HTMLTokenizer m_tokenizer;
|
|
|
|
bool m_next_line_feed_can_be_ignored { false };
|
|
|
|
bool m_foster_parenting { false };
|
|
bool m_frameset_ok { true };
|
|
bool m_parsing_fragment { false };
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag
|
|
// The scripting flag is set to "enabled" if scripting was enabled for the Document with which the parser is associated when the parser was created, and "disabled" otherwise.
|
|
bool m_scripting_enabled { true };
|
|
|
|
bool m_invoked_via_document_write { false };
|
|
bool m_aborted { false };
|
|
bool m_parser_pause_flag { false };
|
|
bool m_stop_parsing { false };
|
|
size_t m_script_nesting_level { 0 };
|
|
|
|
JS::Realm& realm();
|
|
|
|
GC::Ptr<DOM::Document> m_document;
|
|
GC::Ptr<HTMLHeadElement> m_head_element;
|
|
GC::Ptr<HTMLFormElement> m_form_element;
|
|
GC::Ptr<DOM::Element> m_context_element;
|
|
|
|
Vector<HTMLToken> m_pending_table_character_tokens;
|
|
|
|
GC::Ptr<DOM::Text> m_character_insertion_node;
|
|
StringBuilder m_character_insertion_builder { StringBuilder::Mode::UTF16 };
|
|
};
|
|
|
|
class HTMLParserEndState final : public JS::Cell {
|
|
GC_CELL(HTMLParserEndState, JS::Cell);
|
|
GC_DECLARE_ALLOCATOR(HTMLParserEndState);
|
|
|
|
public:
|
|
static GC::Ref<HTMLParserEndState> create(GC::Ref<DOM::Document>, GC::Ptr<HTMLParser>);
|
|
|
|
void schedule_progress_check();
|
|
|
|
private:
|
|
enum class Phase {
|
|
WaitingForDeferredScripts,
|
|
WaitingForASAPScripts,
|
|
WaitingForLoadEventDelay,
|
|
Completed,
|
|
};
|
|
|
|
HTMLParserEndState(GC::Ref<DOM::Document>, GC::Ptr<HTMLParser>);
|
|
|
|
virtual void visit_edges(Cell::Visitor&) override;
|
|
|
|
void check_progress();
|
|
void advance_to_asap_scripts_phase();
|
|
void complete();
|
|
|
|
Phase m_phase { Phase::WaitingForDeferredScripts };
|
|
bool m_check_pending { false };
|
|
|
|
GC::Ref<DOM::Document> m_document;
|
|
GC::Ptr<HTMLParser> m_parser;
|
|
GC::Ref<Platform::Timer> m_timeout;
|
|
};
|
|
|
|
RefPtr<CSS::StyleValue const> parse_dimension_value(StringView);
|
|
RefPtr<CSS::StyleValue const> parse_nonzero_dimension_value(StringView);
|
|
Optional<Color> parse_legacy_color_value(StringView);
|
|
|
|
}
|