ladybird/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp

/*
 * Copyright (c) 2020-2025, Andreas Kling <andreas@ladybird.org>
 * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
 * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
 * Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <AK/Debug.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/ExceptionOrUtils.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/CSS/Parser/Parser.h>
#include <LibWeb/CSS/StyleValues/LengthStyleValue.h>
#include <LibWeb/CSS/StyleValues/PercentageStyleValue.h>
#include <LibWeb/DOM/Attr.h>
#include <LibWeb/DOM/Comment.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/DocumentType.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/DOM/ElementFactory.h>
#include <LibWeb/DOM/Event.h>
#include <LibWeb/DOM/NamedNodeMap.h>
#include <LibWeb/DOM/ProcessingInstruction.h>
#include <LibWeb/DOM/QualifiedName.h>
#include <LibWeb/DOM/ShadowRoot.h>
#include <LibWeb/DOM/Text.h>
#include <LibWeb/HTML/CustomElements/CustomElementDefinition.h>
#include <LibWeb/HTML/CustomElements/CustomElementRegistry.h>
#include <LibWeb/HTML/EventLoop/EventLoop.h>
#include <LibWeb/HTML/EventNames.h>
#include <LibWeb/HTML/HTMLFormElement.h>
#include <LibWeb/HTML/HTMLHtmlElement.h>
#include <LibWeb/HTML/HTMLLinkElement.h>
#include <LibWeb/HTML/HTMLOptionElement.h>
#include <LibWeb/HTML/HTMLScriptElement.h>
#include <LibWeb/HTML/HTMLTemplateElement.h>
#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
#include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/HTML/Parser/HTMLToken.h>
#include <LibWeb/HTML/Parser/SpeculativeHTMLParser.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/Scripting/SimilarOriginWindowAgent.h>
#include <LibWeb/HTML/Window.h>
#include <LibWeb/HTMLTokenizerRustFFI.h>
#include <LibWeb/HighResolutionTime/TimeOrigin.h>
#include <LibWeb/Infra/CharacterTypes.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Namespace.h>
#include <LibWeb/Platform/EventLoopPlugin.h>
#include <LibWeb/SVG/SVGScriptElement.h>

namespace Web::HTML {

GC_DEFINE_ALLOCATOR(HTMLParser);
GC_DEFINE_ALLOCATOR(HTMLParserEndState);

static DOM::Node& node_from_html_parser_ffi(size_t);
static HTMLParser& parser_from_html_parser_ffi(void*);
static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional<FlyString> const&);
static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional<FlyString> const&);
static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode);

extern "C" void ladybird_html_parser_log_parse_error(void*, u8 const*, size_t);
extern "C" void ladybird_html_parser_stop_parsing(void*);
extern "C" bool ladybird_html_parser_parse_errors_enabled();
extern "C" void ladybird_html_parser_visit_node(void*, size_t);
extern "C" size_t ladybird_html_parser_document_node(void*);
extern "C" size_t ladybird_html_parser_document_html_element(void*);
extern "C" void ladybird_html_parser_set_document_quirks_mode(void*, RustFfiHtmlQuirksMode);
extern "C" size_t ladybird_html_parser_create_document_type(void*, u8 const*, size_t, u8 const*, size_t, u8 const*, size_t);
extern "C" size_t ladybird_html_parser_create_comment(void*, u8 const*, size_t);
extern "C" void ladybird_html_parser_insert_text(size_t, size_t, u8 const*, size_t);
extern "C" void ladybird_html_parser_add_missing_attribute(size_t, u8 const*, size_t, u8 const*, size_t);
extern "C" void ladybird_html_parser_remove_node(size_t);
extern "C" void ladybird_html_parser_handle_element_popped(size_t);
extern "C" void ladybird_html_parser_prepare_svg_script(void*, size_t, size_t);
extern "C" void ladybird_html_parser_set_script_source_line(void*, size_t, size_t);
extern "C" void ladybird_html_parser_mark_script_already_started(void*, size_t);
extern "C" size_t ladybird_html_parser_parent_node(size_t);
extern "C" size_t ladybird_html_parser_create_element(void*, size_t, RustFfiHtmlNamespace, u8 const*, size_t, u8 const*, size_t, RustFfiHtmlParserAttribute const*, size_t, bool, size_t, bool);
extern "C" void ladybird_html_parser_append_child(size_t, size_t);
extern "C" void ladybird_html_parser_insert_node(size_t, size_t, size_t, bool);
extern "C" void ladybird_html_parser_move_all_children(size_t, size_t);
extern "C" size_t ladybird_html_parser_template_content(size_t);
extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t, RustFfiHtmlShadowRootMode, RustFfiHtmlSlotAssignmentMode, bool, bool, bool, bool);
extern "C" void ladybird_html_parser_set_template_content(size_t, size_t);
extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t);

HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, StringView input, StringView encoding)
    : m_tokenizer(input, encoding)
    , m_scripting_mode(scripting_mode)
    , m_document(document)
{
    m_rust_parser = rust_html_parser_create();
    m_document->set_parser({}, *this);
    auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
    VERIFY(standardized_encoding.has_value());
    m_document->set_encoding(MUST(String::from_utf8(standardized_encoding.value())));
}

HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, ScriptCreatedParser script_created)
    : m_scripting_mode(scripting_mode)
    , m_script_created(script_created == ScriptCreatedParser::Yes)
    , m_document(document)
{
    m_rust_parser = rust_html_parser_create();
    m_document->set_parser({}, *this);
}

HTMLParser::~HTMLParser() = default;

void HTMLParser::finalize()
{
    Base::finalize();
    if (m_rust_parser) {
        rust_html_parser_destroy(m_rust_parser);
        m_rust_parser = nullptr;
    }
}

void HTMLParser::visit_edges(Cell::Visitor& visitor)
{
    Base::visit_edges(visitor);
    visitor.visit(m_document);
    visitor.visit(m_form_element);
    visitor.visit(m_context_element);
    visitor.visit(m_active_speculative_html_parser);

    rust_html_parser_visit_edges(m_rust_parser, &visitor);
}

void HTMLParser::initialize(JS::Realm& realm)
{
    Base::initialize(realm);
}

void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
    m_stop_parsing = false;

    for (;;) {
        if (m_parser_pause_flag)
            break;

        auto result = rust_html_parser_run_document(
            m_rust_parser,
            m_tokenizer.ffi_handle({}),
            this,
            m_scripting_mode != ParserScriptingMode::Disabled,
            stop_at_insertion_point == HTMLTokenizer::StopAtInsertionPoint::Yes);
        if (result == RustFfiHtmlParserRunResult::Ok)
            break;

        if (result == RustFfiHtmlParserRunResult::ExecuteScript) {
            auto script = rust_html_parser_take_pending_script(m_rust_parser);
            VERIFY(script);
            process_script_end_tag_from_rust_parser(as<HTMLScriptElement>(node_from_html_parser_ffi(script)));
            continue;
        }

        if (result == RustFfiHtmlParserRunResult::ExecuteSvgScript) {
            auto script = rust_html_parser_take_pending_svg_script(m_rust_parser);
            VERIFY(script);
            if (process_svg_script_end_tag_from_rust_parser(as<SVG::SVGScriptElement>(node_from_html_parser_ffi(script))))
                break;
            continue;
        }

        VERIFY_NOT_REACHED();
    }

    m_tokenizer.parser_did_run({});
}

void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
    m_document->set_url(url);
    m_document->set_source(m_tokenizer.source());
    run_until_completion(stop_at_insertion_point);
}

void HTMLParser::pop_all_open_elements()
{
    rust_html_parser_pop_all_open_elements(m_rust_parser);
}

void HTMLParser::configure_element_created_by_rust_parser(DOM::Element& element)
{
    if (element.local_name() == HTML::TagNames::link && element.namespace_uri() == Namespace::HTML) {
        // AD-HOC: Let <link> elements know which document they were originally parsed for.
        //         This is used for the render-blocking logic.
        auto& link_element = as<HTMLLinkElement>(element);
        link_element.set_parser_document({}, document());
        link_element.set_was_enabled_when_created_by_parser({}, !element.has_attribute(HTML::AttributeNames::disabled));
        return;
    }

    if (element.local_name() != HTML::TagNames::script || element.namespace_uri() != Namespace::HTML)
        return;

    auto& script_element = as<HTMLScriptElement>(element);
    if (m_scripting_mode != ParserScriptingMode::Fragment)
        script_element.set_parser_document(Badge<HTMLParser> {}, document());
    script_element.set_force_async(Badge<HTMLParser> {}, false);
    if (m_scripting_mode == ParserScriptingMode::Inert)
        script_element.set_already_started(Badge<HTMLParser> {}, true);
}

GC::Ref<DOM::Element> HTMLParser::create_element_for_rust_parser(HTMLToken const& token, Optional<FlyString> const& namespace_, DOM::Node& intended_parent, bool had_duplicate_attribute, GC::Ptr<HTMLFormElement> form_element, bool has_template_element_on_stack)
{
    auto element = create_element_for(token, namespace_, intended_parent);
    configure_element_created_by_rust_parser(element);

    // AD-HOC: See AD-HOC comment on Element.m_had_duplicate_attribute_during_tokenization about why this is done.
    if (had_duplicate_attribute)
        element->set_had_duplicate_attribute_during_tokenization({});

    if (form_element && !has_template_element_on_stack) {
        auto* html_element = as_if<HTML::HTMLElement>(*element);
        if (html_element && html_element->is_form_associated_element() && !html_element->is_form_associated_custom_element()) {
            if ((!html_element->is_listed() || !html_element->has_attribute(HTML::AttributeNames::form))
                && &intended_parent.root() == &form_element->root()) {
                html_element->set_form(form_element.ptr());
                html_element->set_parser_inserted({});
            }
        }
    }

    return element;
}

bool HTMLParser::process_script_end_tag_from_rust_parser(HTMLScriptElement& script)
{
    // If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint.
    // The active speculative HTML parser is null here; start/stop are paired around the spin_until below.
    auto& vm = main_thread_event_loop().vm();
    if (!vm.has_running_execution_context())
        perform_a_microtask_checkpoint();

    // Let the old insertion point have the same value as the current insertion point.
    m_tokenizer.store_old_insertion_point();

    // Let the insertion point be just before the next input character.
    m_tokenizer.update_insertion_point();

    // Increment the parser's script nesting level by one.
    increment_script_nesting_level();

    // https://w3c.github.io/trusted-types/dist/spec/#setting-slot-values-from-parser
    // Set script’s script text value to its child text content.
    script.set_string_text(script.child_text_content());

    // If the active speculative HTML parser is null, then prepare the script element script.
    // This might cause some script to execute, which might cause new characters to be inserted into the tokenizer,
    // and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser.
    // The active speculative HTML parser is null here (see above).
    script.prepare_script(Badge<HTMLParser> {});

    // Decrement the parser's script nesting level by one.
    decrement_script_nesting_level();

    // If the parser's script nesting level is zero, then set the parser pause flag to false.
    if (script_nesting_level() == 0)
        m_parser_pause_flag = false;

    // Let the insertion point have the value of the old insertion point.
    m_tokenizer.restore_old_insertion_point();

    // At this stage, if the pending parsing-blocking script is not null, then:
    if (document().pending_parsing_blocking_script()) {
        // -> If the script nesting level is not zero:
        if (script_nesting_level() != 0) {
            // Set the parser pause flag to true,
            m_parser_pause_flag = true;
            // and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller.
            // (Tokenization will resume when the caller returns to the "outer" tree construction stage.)
            return true;
        }

        // -> Otherwise:
        // The spec's "While the pending parsing-blocking script is not null" loop and the contained "spin the event
        // loop" step are implemented asynchronously: pause the parser, schedule a resume check, and yield back to
        // the caller. The remaining steps (4-13) run from resume_after_parser_blocking_script when the script is
        // ready.

        // 3. Start the speculative HTML parser for this instance of the HTML parser.
        start_the_speculative_html_parser();

        m_parser_pause_flag = true;
        schedule_resume_check();
    }

    return m_parser_pause_flag;
}

void HTMLParser::prepare_svg_script_for_rust_parser(SVG::SVGScriptElement& script, size_t source_line_number)
{
    // AD-HOC: For SVG script elements, set the parser-inserted flag before the element is inserted into the DOM.
    // Otherwise inserted()/attribute_changed() would invoke process_the_script_element() with the flag still unset
    // and bypass the parser-blocking fetch handling.
    //
    // https://html.spec.whatwg.org/multipage/parsing.html#scripting-mode
    // The Fragment scripting mode treats parser-inserted scripts as if they were not parser-inserted, allowing, for
    // example, executing scripts when applying a fragment created by createContextualFragment().
    if (m_scripting_mode != ParserScriptingMode::Fragment)
        script.set_parser_inserted({});
    script.set_source_line_number({}, source_line_number);
}

void HTMLParser::set_script_source_line_from_rust_parser(DOM::Element& element, size_t source_line_number)
{
    if (auto* html_script_element = as_if<HTML::HTMLScriptElement>(element)) {
        html_script_element->set_source_line_number({}, source_line_number);
        return;
    }
    if (auto* svg_script_element = as_if<SVG::SVGScriptElement>(element))
        svg_script_element->set_source_line_number({}, source_line_number);
}

void HTMLParser::mark_script_already_started_from_rust_parser(HTMLScriptElement& script)
{
    script.set_already_started(Badge<HTMLParser> {}, true);
}

void HTMLParser::stop_parsing_from_rust_parser()
{
    stop_parsing();
}

bool HTMLParser::process_svg_script_end_tag_from_rust_parser(SVG::SVGScriptElement& script)
{
    // Let the old insertion point have the same value as the current insertion point.
    m_tokenizer.store_old_insertion_point();

    // Let the insertion point be just before the next input character.
    m_tokenizer.update_insertion_point();

    // Increment the parser's script nesting level by one.
    increment_script_nesting_level();

    // Set the parser pause flag to true.
    m_parser_pause_flag = true;

    // If the active speculative HTML parser is null and the user agent supports SVG, then Process the SVG script element according to the SVG rules. [SVG]
    // The active speculative HTML parser is null here.
    script.process_the_script_element();

    // Decrement the parser's script nesting level by one.
    decrement_script_nesting_level();

    // If the parser's script nesting level is zero, then set the parser pause flag to false.
    if (script_nesting_level() == 0)
        m_parser_pause_flag = false;

    // Let the insertion point have the value of the old insertion point.
    m_tokenizer.restore_old_insertion_point();

    // If the SVG script registered itself as a pending parsing-blocking script (external fetch in flight),
    // pause the parser and schedule a resume check. The parser will resume from
    // resume_after_parser_blocking_script when the fetch completes.
    if (document().pending_parsing_blocking_svg_script()) {
        m_parser_pause_flag = true;
        schedule_resume_check();
    }

    return m_parser_pause_flag;
}

void HTMLParser::run_until_completion(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
    m_post_parse_action = [this] { the_end(*m_document, this); };
    run(stop_at_insertion_point);
    if (!m_parser_pause_flag)
        invoke_post_parse_action();
}

// https://html.spec.whatwg.org/multipage/parsing.html#the-end
void HTMLParser::the_end(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
{
    // Once the user agent stops parsing the document, the user agent must run the following steps:

    // NOTE: This is a static method because the spec sometimes wants us to "act as if the user agent had stopped
    //       parsing document" which means running these steps without an HTML Parser. That makes it awkward to call,
    //       but it's preferable to duplicating so much code.

    if (parser)
        VERIFY(document == parser->m_document);

    // The entirety of "the end" should be a no-op for HTML fragment parsers, because:
    // - the temporary document is not accessible, making the DOMContentLoaded event and "ready for post load tasks" do
    //   nothing, making the parser not re-entrant from document.{open,write,close} and document.readyState inaccessible
    // - there is no Window associated with it and no associated browsing context with the temporary document (meaning
    //   the Window load event is skipped and making the load timing info inaccessible)
    // - scripts are not able to be prepared, meaning the script queues are empty.
    // However, the unconditional "spin the event loop" invocations cause two issues:
    // - Microtask timing is changed, as "spin the event loop" performs an unconditional microtask checkpoint, causing
    //   things to happen out of order. For example, YouTube sets the innerHTML of a <template> element in the constructor
    //   of the ytd-app custom element _before_ setting up class attributes. Since custom elements use microtasks to run
    //   callbacks, this causes custom element callbacks that rely on attributes setup by the constructor to run before
    //   the attributes are set up, causing unhandled exceptions.
    // - Load event delaying can spin forever, e.g. if the fragment contains an <img> element which stops delaying the
    //   load event from an element task. Since tasks are not considered runnable if they're from a document with no
    //   browsing context (i.e. the temporary document made for innerHTML), the <img> element will forever delay the load
    //   event and cause an infinite loop.
    // We can avoid these issues and also avoid doing unnecessary work by simply skipping "the end" for HTML fragment
    // parsers.
    // See the message of the commit that added this for more details.
    if (parser && parser->m_parsing_fragment)
        return;

    // 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.
    if (parser && parser->m_active_speculative_html_parser) {
        parser->stop_the_speculative_html_parser();
        return;
    }

    // 2. Set the insertion point to undefined.
    if (parser)
        parser->m_tokenizer.undefine_insertion_point();

    // 3. Update the current document readiness to "interactive".
    document->update_readiness(HTML::DocumentReadyState::Interactive);

    // 4. Pop all the nodes off the stack of open elements.
    if (parser)
        parser->pop_all_open_elements();

    // AD-HOC: Skip remaining steps when there's no browsing context.
    // This happens when parsing HTML via DOMParser or similar mechanisms.
    // Note: This diverges from the spec, which expects more steps to follow.
    if (!document->browsing_context()) {
        // Parsed via DOMParser, no need to wait for load events.
        document->update_readiness(HTML::DocumentReadyState::Complete);
        return;
    }

    // Steps 5-11 are handled by the HTMLParserEndState state machine.
    auto state = HTMLParserEndState::create(document, parser);
    document->set_html_parser_end_state(state);
    state->schedule_progress_check();
}

static constexpr int THE_END_TIMEOUT_MS = 15000;

// Perform a microtask checkpoint matching spin_until's pre-check semantics: pending microtasks (e.g. image load-event
// delayer creation from update_the_image_data step 8) must be drained before checking parser progress. The empty-queue
// fast path avoids the save/clear/restore of the execution context stack and notify_about_rejected_promises when there
// is nothing to drain.
static void perform_pre_progress_microtask_checkpoint()
{
    auto& event_loop = main_thread_event_loop();
    if (event_loop.microtask_queue_empty())
        return;
    auto& vm = event_loop.vm();
    vm.save_execution_context_stack();
    vm.clear_execution_context_stack();
    event_loop.perform_a_microtask_checkpoint();
    vm.restore_execution_context_stack();
}

GC::Ref<HTMLParserEndState> HTMLParserEndState::create(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
{
    return document->heap().allocate<HTMLParserEndState>(document, parser);
}

HTMLParserEndState::HTMLParserEndState(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
    : m_document(document)
    , m_parser(parser)
    , m_timeout(Platform::Timer::create_single_shot(heap(), THE_END_TIMEOUT_MS, GC::create_function(heap(), [this] {
        if (m_phase != Phase::Completed)
            dbgln("HTMLParserEndState: timed out in phase {}", to_underlying(m_phase));
    })))
{
    m_timeout->start();
}

void HTMLParserEndState::visit_edges(Cell::Visitor& visitor)
{
    Base::visit_edges(visitor);
    visitor.visit(m_document);
    visitor.visit(m_parser);
    visitor.visit(m_timeout);
}

void HTMLParserEndState::schedule_progress_check()
{
    if (m_phase == Phase::Completed)
        return;
    if (m_check_pending)
        return;
    m_check_pending = true;
    Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(heap(), [this] {
        perform_pre_progress_microtask_checkpoint();
        check_progress();
        m_check_pending = false;
    }));
}

void HTMLParserEndState::check_progress()
{
    // AD-HOC: Bail out if the document is no longer fully active (e.g. navigated away from).
    if (!m_document->is_fully_active()) {
        complete();
        return;
    }

    switch (m_phase) {
    case Phase::WaitingForDeferredScripts:
        // 5. While the list of scripts that will execute when the document has finished parsing is not empty:
        while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) {
            auto& first_script = *m_document->scripts_to_execute_when_parsing_has_finished().first();

            // 1. Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing
            //    has its "ready to be parser-executed" flag set and the parser's Document has no style sheet that is blocking scripts.
            if (!first_script.is_ready_to_be_parser_executed() || m_document->has_a_style_sheet_that_is_blocking_scripts())
                return;

            // 2. Execute the first script in the list of scripts that will execute when the document has finished parsing.
            first_script.execute_script();

            // 3. Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list).
            (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first();
        }

        advance_to_asap_scripts_phase();
        [[fallthrough]];

    case Phase::WaitingForASAPScripts:
        // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts
        //    that will execute in order as soon as possible are empty.
        if (!m_document->scripts_to_execute_as_soon_as_possible().is_empty()
            || !m_document->scripts_to_execute_in_order_as_soon_as_possible().is_empty())
            return;

        m_phase = Phase::WaitingForLoadEventDelay;
        [[fallthrough]];

    case Phase::WaitingForLoadEventDelay:
        // 8. Spin the event loop until there is nothing that delays the load event in the Document.
        if (m_document->anything_is_delaying_the_load_event())
            return;

        m_phase = Phase::Completed;
        [[fallthrough]];

    case Phase::Completed:
        complete();
        return;
    }
}

void HTMLParserEndState::advance_to_asap_scripts_phase()
{
    // AD-HOC: We need to scroll to the fragment on page load somewhere.
    // But a script that ran in step 5 above may have scrolled the page already,
    // so only do this if there is an actual fragment to avoid resetting the scroll position unexpectedly.
    // Spec bug: https://github.com/whatwg/html/issues/10914
    auto indicated_part = m_document->determine_the_indicated_part();
    if (indicated_part.has<DOM::Element*>() && indicated_part.get<DOM::Element*>() != nullptr) {
        m_document->scroll_to_the_fragment();
    }

    // 6. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps:
    queue_global_task(HTML::Task::Source::DOMManipulation, *m_document, GC::create_function(m_document->heap(), [document = m_document] {
        // 1. Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object.
        document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document));

        // 2. Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true.
        auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded);
        content_loaded_event->set_bubbles(true);
        document->dispatch_event(content_loaded_event);

        // 3. Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object.
        document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document));

        // FIXME: 4. Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object.

        // FIXME: 5. Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL.
    }));

    m_phase = Phase::WaitingForASAPScripts;
}

void HTMLParserEndState::complete()
{
    m_phase = Phase::Completed;
    m_timeout->stop();
    m_document->set_html_parser_end_state(nullptr);

    // 9. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps:
    queue_global_task(HTML::Task::Source::DOMManipulation, *m_document, GC::create_function(m_document->heap(), [document = m_document, parser = m_parser] {
        // 1. Update the current document readiness to "complete".
        document->update_readiness(HTML::DocumentReadyState::Complete);

        // AD-HOC: We need to wait until the document ready state is complete before detaching the parser, otherwise the DOM complete time will not be set correctly.
        if (parser)
            document->detach_parser();

        // 2. If the Document object's browsing context is null, then abort these steps.
        if (!document->browsing_context())
            return;

        // 3. Let window be the Document's relevant global object.
        auto& window = as<Window>(relevant_global_object(*document));

        // 4. Set the Document's load timing info's load event start time to the current high resolution time given window.
        document->load_timing_info().load_event_start_time = HighResolutionTime::current_high_resolution_time(window);

        // 5. Fire an event named load at window, with legacy target override flag set.
        // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event()
        //        We should reorganize this so that the flag appears explicitly here instead.
        window.dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load));

        // FIXME: 6. Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL.

        // FIXME: 7. Set the Document object's navigation id to null.

        // 8. Set the Document's load timing info's load event end time to the current high resolution time given window.
        document->load_timing_info().load_event_end_time = HighResolutionTime::current_high_resolution_time(window);

        // 9. Assert: Document's page showing is false.
        VERIFY(!document->page_showing());

        // 10. Set the Document's page showing to true.
        document->set_page_showing(true);

        // 11. Fire a page transition event named pageshow at window with false.
        window.fire_a_page_transition_event(HTML::EventNames::pageshow, false);

        // 12. Completely finish loading the Document.
        document->completely_finish_loading();

        // FIXME: 13. Queue the navigation timing entry for the Document.
    }));

    // FIXME: 10. If the Document's print when loaded flag is set, then run the printing steps.

    // 11. The Document is now ready for post-load tasks.
    m_document->set_ready_for_post_load_tasks(true);
}
// https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
GC::Ref<DOM::Element> HTMLParser::create_element_for(HTMLToken const& token, Optional<FlyString> const& namespace_, DOM::Node& intended_parent)
{
    // 1. If the active speculative HTML parser is not null, then return the result of creating a speculative mock element given namespace, token's tag name, and token's attributes.
    // The active speculative HTML parser runs synchronously to completion, so it is null whenever the real
    // parser invokes this algorithm. The speculative parser produces mock elements via its own path.

    // 2. Otherwise, optionally create a speculative mock element given namespace, token's tag name, and token's attributes.
    // We deliberately skip step 2 — the active speculative parser already issues these fetches, so doing it
    // again here would be redundant.

    // 3. Let document be intendedParent's node document.
    GC::Ref<DOM::Document> document = intended_parent.document();

    // 4. Let localName be token's tag name.
    auto const& local_name = token.tag_name();

    // 5. Let is be the value of the "is" attribute in token, if such an attribute exists; otherwise null.
    auto is_value = token.attribute(AttributeNames::is);

    // 6. Let registry be the result of looking up a custom element registry given intendedParent.
    auto registry = look_up_a_custom_element_registry(intended_parent);

    // 7. Let definition be the result of looking up a custom element definition given registry, namespace, localName,
    //    and is.
    auto definition = look_up_a_custom_element_definition(registry, namespace_, local_name, is_value);

    // 8. Let willExecuteScript be true if definition is non-null and the parser was not created as part of the HTML
    //    fragment parsing algorithm; otherwise false.
    bool will_execute_script = definition && !m_parsing_fragment;

    // 9. If willExecuteScript is true:
    if (will_execute_script) {
        // 1. Increment document's throw-on-dynamic-markup-insertion counter.
        document->increment_throw_on_dynamic_markup_insertion_counter({});

        // 2. If the JavaScript execution context stack is empty, then perform a microtask checkpoint.
        auto& vm = main_thread_event_loop().vm();
        if (!vm.has_running_execution_context())
            perform_a_microtask_checkpoint();

        // 3. Push a new element queue onto document's relevant agent's custom element reactions stack.
        relevant_similar_origin_window_agent(document).custom_element_reactions_stack.element_queue_stack.append({});
    }

    // 10. Let element be the result of creating an element given document, localName, namespace, null, is,
    //     willExecuteScript, and registry.
    auto element = create_element(*document, local_name, namespace_, {}, is_value, will_execute_script, registry).release_value_but_fixme_should_propagate_errors();

    // AD-HOC: See AD-HOC comment on Element.m_had_duplicate_attribute_during_tokenization about why this is done.
    if (token.had_duplicate_attribute()) {
        element->set_had_duplicate_attribute_during_tokenization({});
    }

    // AD-HOC: Let <link> elements know which document they were originally parsed for.
    //         This is used for the render-blocking logic.
    if (local_name == HTML::TagNames::link && namespace_ == Namespace::HTML) {
        auto& link_element = as<HTMLLinkElement>(*element);
        link_element.set_parser_document({}, document);
        link_element.set_was_enabled_when_created_by_parser({}, !token.has_attribute(HTML::AttributeNames::disabled));
    }

    // 11. Append each attribute in the given token to element.
    token.for_each_attribute([&](auto const& attribute) {
        DOM::QualifiedName qualified_name { attribute.local_name, attribute.prefix, attribute.namespace_ };
        auto dom_attribute = realm().create<DOM::Attr>(*document, move(qualified_name), attribute.value, element);
        element->append_attribute(dom_attribute);
        return IterationDecision::Continue;
    });

    // AD-HOC: The muted attribute on media elements is only set if the muted content attribute is present when the element is first created.
    if (element->is_html_media_element() && namespace_ == Namespace::HTML) {
        // https://html.spec.whatwg.org/multipage/media.html#user-interface:attr-media-muted
        // When a media element is created, if the element has a muted content attribute specified, then the muted IDL
        // attribute should be set to true; otherwise, the user agents may set the value to the user's preferred value.
        if (element->has_attribute(HTML::AttributeNames::muted)) {
            auto& media_element = as<HTMLMediaElement>(*element);
            media_element.set_muted(true);
        }
    }

    // 12. If willExecuteScript is true:
    if (will_execute_script) {
        // 1. Let queue be the result of popping from document's relevant agent's custom element reactions stack.
        //    (This will be the same element queue as was pushed above.)
        auto queue = relevant_similar_origin_window_agent(document).custom_element_reactions_stack.element_queue_stack.take_last();

        // 2. Invoke custom element reactions in queue.
        Bindings::invoke_custom_element_reactions(queue);

        // 3. Decrement document's throw-on-dynamic-markup-insertion counter.
        document->decrement_throw_on_dynamic_markup_insertion_counter({});
    }

    // FIXME: 13. If element has an xmlns attribute in the XMLNS namespace whose value is not exactly the same as the element's namespace, that is a parse error.
    //            Similarly, if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not the XLink Namespace, that is a parse error.

    if (auto* html_element = as_if<HTML::HTMLElement>(*element)) {
        if (html_element->is_form_associated_element() && !html_element->is_form_associated_custom_element()) {
            // 14. If element is a resettable element and not a form-associated custom element, then invoke its reset algorithm.
            //     (This initializes the element's value and checkedness based on the element's attributes.)
            if (html_element->is_resettable())
                html_element->reset_algorithm();
        }
    }

    // 16. Return element.
    return element;
}

void HTMLParser::schedule_resume_check()
{
    if (m_resume_check_pending)
        return;
    if (!m_parser_pause_flag)
        return;
    m_resume_check_pending = true;
    Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(heap(), [this] {
        m_resume_check_pending = false;
        perform_pre_progress_microtask_checkpoint();
        resume_after_parser_blocking_script();
    }));
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
// Async equivalent of "spin the event loop until ... ready to be parser-executed" from the per-iteration block of the
// "text" insertion mode (steps 4-13). Driven by schedule_resume_check.
void HTMLParser::resume_after_parser_blocking_script()
{
    if (!m_parser_pause_flag)
        return;
    if (m_aborted || m_stop_parsing)
        return;

    auto pending = document().pending_parsing_blocking_script();
    auto pending_svg = document().pending_parsing_blocking_svg_script();
    bool ready = false;
    if (pending)
        ready = pending->is_ready_to_be_parser_executed();
    else if (pending_svg)
        ready = pending_svg->is_ready_to_be_parser_executed();
    else
        return;

    // 5. If the parser's Document has a style sheet that is blocking scripts or the script's ready to be
    //    parser-executed is false: spin the event loop until the parser's Document has no style sheet that is blocking
    //    scripts and the script's ready to be parser-executed becomes true.
    // The async equivalent: return without taking the script; schedule_resume_check re-fires this method when the
    // relevant state changes.
    if (m_document->has_a_style_sheet_that_is_blocking_scripts())
        return;
    if (!ready)
        return;

    // 3. Start the speculative HTML parser for this instance of the HTML parser.
    // (Done at the pause point in the corresponding insertion-mode handler, so that speculation runs during the wait.)

    // 4. Block the tokenizer for this instance of the HTML parser, such that the event loop will not run tasks that
    //    invoke the tokenizer.
    // (No-op: pausing is expressed by returning from run() and m_parser_pause_flag, not a tokenizer-level block flag.)

    // 6. If this parser has been aborted in the meantime, return.
    if (m_aborted)
        return;

    // 7. Stop the speculative HTML parser for this instance of the HTML parser.
    stop_the_speculative_html_parser();

    // 8. Unblock the tokenizer for this instance of the HTML parser, such that tasks that invoke the tokenizer can
    //    again be run. (No-op, see step 4.)

    // 9. Let the insertion point be just before the next input character.
    m_tokenizer.update_insertion_point();

    // 10. Increment the parser's script nesting level by one (it should be zero before this step, so this sets it to
    //     one).
    VERIFY(script_nesting_level() == 0);
    increment_script_nesting_level();

    // Step 8 unblocked the tokenizer above. Our async "spin the event loop" implementation uses the parser pause flag
    // to yield while waiting for the pending script, so clear it before executing the script. This allows
    // document.write() calls made by the script to synchronously re-enter the parser up to the insertion point.
    m_parser_pause_flag = false;

    // 1. Let the script be the pending parsing-blocking script.
    // 2. Set the pending parsing-blocking script to null.
    // 11. Execute the script element the script.
    if (pending)
        document().take_pending_parsing_blocking_script({})->execute_script();
    else
        document().take_pending_parsing_blocking_svg_script({})->execute_pending_parser_blocking_script({});

    // 12. Decrement the parser's script nesting level by one.
    decrement_script_nesting_level();

    // If the parser's script nesting level is zero (which it always should be at this point), then set the parser pause
    // flag to false.
    VERIFY(script_nesting_level() == 0);
    m_parser_pause_flag = false;

    // 13. Let the insertion point be undefined again.
    m_tokenizer.undefine_insertion_point();

    // The spec's loop would handle the next pending parsing-blocking script before continuing normal tokenization.
    // In this async implementation, pause again and resume when that next script is ready.
    if (document().has_pending_parsing_blocking_script()) {
        m_parser_pause_flag = true;
        schedule_resume_check();
        return;
    }

    // The spec's "While the pending parsing-blocking script is not null" iteration is realized by run() pausing again
    // on the next </script> end tag if the executed script set up a new pending blocking script (e.g. via
    // document.write).
    run();

    if (m_parser_pause_flag)
        return;

    invoke_post_parse_action();
}

void HTMLParser::invoke_post_parse_action()
{
    if (auto action = exchange(m_post_parse_action, nullptr))
        action();
}

void HTMLParser::increment_script_nesting_level()
{
    ++m_script_nesting_level;
}

void HTMLParser::decrement_script_nesting_level()
{
    VERIFY(m_script_nesting_level);
    --m_script_nesting_level;
}

DOM::Document& HTMLParser::document()
{
    return *m_document;
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
WebIDL::ExceptionOr<Vector<GC::Root<DOM::Node>>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots, ParserScriptingMode scripting_mode)
{
    // 1. Assert: scriptingMode is either Inert or Fragment.
    VERIFY(scripting_mode == HTML::ParserScriptingMode::Inert || scripting_mode == HTML::ParserScriptingMode::Fragment);

    // 2. Let document be a Document node whose type is "html".
    auto temp_document = DOM::Document::create_for_fragment_parsing(context_element.realm());
    temp_document->set_document_type(DOM::Document::Type::HTML);

    // AD-HOC: We set the about base URL of the document to the same as the context element's document.
    //         This is required for Document::parse_url() to work inside iframe srcdoc documents.
    //         Spec issue: https://github.com/whatwg/html/issues/12210
    temp_document->set_about_base_url(context_element.document().about_base_url());

    // 3. Let contextDocument be context's node document.
    auto& context_document = context_element.document();

    // 4. If contextDocument is in quirks mode, then set document's mode to "quirks".
    if (context_document.in_quirks_mode()) {
        temp_document->set_quirks_mode(DOM::QuirksMode::Yes);
    }
    // 5. Otherwise, if context's node document is in limited-quirks mode, then set document's mode to "limited-quirks".
    else if (context_element.document().in_limited_quirks_mode()) {
        temp_document->set_quirks_mode(DOM::QuirksMode::Limited);
    }

    // 6. If allowDeclarativeShadowRoots is true, then set document's allow declarative shadow roots to true.
    if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
        temp_document->set_allow_declarative_shadow_roots(true);

    // 7. Create a new HTML parser, and associate it with document.
    // 8. If contextDocument's scripting is disabled, then set scriptingMode to Disabled.
    // 9. Set the parser's scripting mode to scriptingMode.
    if (context_element.document().is_scripting_disabled())
        scripting_mode = HTML::ParserScriptingMode::Disabled;

    auto parser = HTMLParser::create(*temp_document, markup, scripting_mode, "utf-8"sv);
    parser->m_context_element = context_element;
    parser->m_parsing_fragment = true;

    // 10. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
    bool const context_element_is_html = context_element.namespace_uri() == Namespace::HTML;
    // - title
    // - textarea
    if (context_element_is_html
        && context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
        // Switch the tokenizer to the RCDATA state.
        parser->m_tokenizer.switch_to(HTMLTokenizer::State::RCDATA);
    }
    // - style
    // - xmp
    // - iframe
    // - noembed
    // - noframes
    else if (context_element_is_html
        && context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
        // Switch the tokenizer to the RAWTEXT state.
        parser->m_tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
    }
    // - script
    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::script)) {
        // Switch the tokenizer to the script data state.
        parser->m_tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
    }
    // - noscript
    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
        // If scripting mode is not Disabled, switch the tokenizer to the RAWTEXT state. Otherwise, leave the tokenizer in the data state.
        if (scripting_mode != HTML::ParserScriptingMode::Disabled)
            parser->m_tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
    }
    // - plaintext
    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
        // Switch the tokenizer to the PLAINTEXT state.
        parser->m_tokenizer.switch_to(HTMLTokenizer::State::PLAINTEXT);
    }
    // Any other element
    else {
        // Leave the tokenizer in the data state.
    }

    // 11. Let root be the result of creating an element given document, "html", the HTML namespace, null, null, false,
    //    and context's custom element registry.
    auto root = MUST(create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML, {}, {}, false, context_element.custom_element_registry()));

    // 12. Append root to document.
    MUST(temp_document->append_child(root));

    // 17. Set the HTML parser's form element pointer to the nearest node to context that is a form element
    //     (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
    //     (If there is no such form element, the form element pointer keeps its initial value, null.)
    parser->m_form_element = as_if<HTMLFormElement>(context_element);
    if (!parser->m_form_element)
        parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();

    auto context_local_name = context_element.local_name().bytes_as_string_view();
    auto context_namespace = context_element.namespace_uri();
    auto context_namespace_ffi = namespace_to_html_parser_ffi(context_namespace);
    StringView context_namespace_uri;
    if (context_namespace_ffi == RustFfiHtmlNamespace::Other && context_namespace.has_value())
        context_namespace_uri = context_namespace->bytes_as_string_view();
    Vector<RustFfiHtmlParserAttribute> context_attributes;
    if (auto attributes = context_element.attributes()) {
        context_attributes.ensure_capacity(attributes->length());
        for (size_t i = 0; i < attributes->length(); ++i) {
            auto const* attribute = attributes->item(i);
            auto local_name = attribute->local_name().bytes_as_string_view();
            auto value = attribute->value().bytes_as_string_view();
            auto prefix = attribute->prefix().map([](auto const& prefix) { return prefix.bytes_as_string_view(); });
            context_attributes.unchecked_append({
                reinterpret_cast<u8 const*>(local_name.characters_without_null_termination()),
                local_name.length(),
                prefix.has_value() ? reinterpret_cast<u8 const*>(prefix->characters_without_null_termination()) : nullptr,
                prefix.has_value() ? prefix->length() : 0,
                attribute_namespace_to_html_parser_ffi(attribute->namespace_uri()),
                reinterpret_cast<u8 const*>(value.characters_without_null_termination()),
                value.length(),
            });
        }
    }
    rust_html_parser_begin_fragment(
        parser->m_rust_parser,
        reinterpret_cast<size_t>(root.ptr()),
        reinterpret_cast<size_t>(&context_element),
        context_namespace_ffi,
        reinterpret_cast<u8 const*>(context_namespace_uri.characters_without_null_termination()),
        context_namespace_uri.length(),
        reinterpret_cast<u8 const*>(context_local_name.characters_without_null_termination()),
        context_local_name.length(),
        context_attributes.data(),
        context_attributes.size(),
        quirks_mode_to_html_parser_ffi(temp_document->mode()),
        parser->m_form_element ? reinterpret_cast<size_t>(parser->m_form_element.ptr()) : 0);

    // 18. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
    // 19. Start the HTML parser and let it run until it has consumed all the characters just inserted into the input stream.
    parser->run(context_element.document().url());

    // 20. Return root's children, in tree order.
    Vector<GC::Root<DOM::Node>> children;
    while (GC::Ptr<DOM::Node> child = root->first_child()) {
        MUST(root->remove_child(*child));
        context_element.document().adopt_node(*child);
        children.append(GC::make_root(*child));
    }
    return children;
}

GC::Ref<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document)
{
    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
    return document.realm().create<HTMLParser>(document, scripting_mode, ScriptCreatedParser::Yes);
}

GC::Ref<HTMLParser> HTMLParser::create_with_open_input_stream(DOM::Document& document)
{
    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
    return document.realm().create<HTMLParser>(document, scripting_mode, ScriptCreatedParser::No);
}

GC::Ref<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type)
{
    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
    if (document.has_encoding())
        return document.realm().create<HTMLParser>(document, scripting_mode, input, document.encoding().value().to_byte_string());
    auto encoding = run_encoding_sniffing_algorithm(document, input, maybe_mime_type);
    dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding);
    return document.realm().create<HTMLParser>(document, scripting_mode, input, encoding);
}

GC::Ref<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, ParserScriptingMode scripting_mode, StringView encoding)
{
    return document.realm().create<HTMLParser>(document, scripting_mode, input, encoding);
}

enum class AttributeMode {
    No,
    Yes,
};

template<OneOf<Utf8View, Utf16View> ViewType>
static String escape_string(ViewType const& string, AttributeMode attribute_mode)
{
    // https://html.spec.whatwg.org/multipage/parsing.html#escapingString
    StringBuilder builder;
    for (auto code_point : string) {
        // 1. Replace any occurrence of the "&" character by the string "&amp;".
        if (code_point == '&')
            builder.append("&amp;"sv);
        // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string "&nbsp;".
        else if (code_point == 0xA0)
            builder.append("&nbsp;"sv);
        // 3. Replace any occurrences of the "<" character by the string "&lt;".
        else if (code_point == '<')
            builder.append("&lt;"sv);
        // 4. Replace any occurrences of the ">" character by the string "&gt;".
        else if (code_point == '>')
            builder.append("&gt;"sv);
        // 5. If the algorithm was invoked in the attribute mode, then replace any occurrences of the """ character by the string "&quot;".
        else if (code_point == '"' && attribute_mode == AttributeMode::Yes)
            builder.append("&quot;"sv);
        else
            builder.append_code_point(code_point);
    }
    return builder.to_string_without_validation();
}

// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm
String HTMLParser::serialize_html_fragment(DOM::Node const& node, SerializableShadowRoots serializable_shadow_roots, Vector<GC::Root<DOM::ShadowRoot>> const& shadow_roots, DOM::FragmentSerializationMode fragment_serialization_mode)
{
    // NOTE: Steps in this function are jumbled a bit to accommodate the Element.outerHTML API.
    //       When called with FragmentSerializationMode::Outer, we will serialize the element itself,
    //       not just its children.

    // 2. Let s be a string, and initialize it to the empty string.
    StringBuilder builder;

    auto serialize_element = [&](DOM::Element const& element) {
        // If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name.
        // Otherwise, let tagname be current node's qualified name.
        FlyString tag_name;

        if (element.namespace_uri().has_value() && element.namespace_uri()->is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG))
            tag_name = element.local_name();
        else
            tag_name = element.qualified_name();

        // Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
        builder.append('<');
        builder.append(tag_name);

        // If current node's is value is not null, and the element does not have an is attribute in its attribute list,
        // then append the string " is="",
        // followed by current node's is value escaped as described below in attribute mode,
        // followed by a U+0022 QUOTATION MARK character (").
        if (element.is_value().has_value() && !element.has_attribute(AttributeNames::is)) {
            builder.append(" is=\""sv);
            builder.append(escape_string(element.is_value().value().code_points(), AttributeMode::Yes));
            builder.append('"');
        }

        // For each attribute that the element has,
        // append a U+0020 SPACE character,
        // the attribute's serialized name as described below,
        // a U+003D EQUALS SIGN character (=),
        // a U+0022 QUOTATION MARK character ("),
        // the attribute's value, escaped as described below in attribute mode,
        // and a second U+0022 QUOTATION MARK character (").
        element.for_each_attribute([&](auto const& attribute) {
            builder.append(' ');

            // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows:
            // -> If the attribute has no namespace:
            if (!attribute.namespace_uri().has_value()) {
                // The attribute's serialized name is the attribute's local name.
                builder.append(attribute.local_name());
            }
            // -> If the attribute is in the XML namespace:
            else if (attribute.namespace_uri() == Namespace::XML) {
                // The attribute's serialized name is the string "xml:" followed by the attribute's local name.
                builder.append("xml:"sv);
                builder.append(attribute.local_name());
            }
            // -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns:
            else if (attribute.namespace_uri() == Namespace::XMLNS && attribute.local_name() == "xmlns") {
                // The attribute's serialized name is the string "xmlns".
                builder.append("xmlns"sv);
            }
            // -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns:
            else if (attribute.namespace_uri() == Namespace::XMLNS) {
                // The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
                builder.append("xmlns:"sv);
                builder.append(attribute.local_name());
            }
            // -> If the attribute is in the XLink namespace:
            else if (attribute.namespace_uri() == Namespace::XLink) {
                // The attribute's serialized name is the string "xlink:" followed by the attribute's local name.
                builder.append("xlink:"sv);
                builder.append(attribute.local_name());
            }
            // -> If the attribute is in some other namespace:
            else {
                // The attribute's serialized name is the attribute's qualified name.
                builder.append(attribute.name());
            }

            builder.append("=\""sv);
            builder.append(escape_string(attribute.value().code_points(), AttributeMode::Yes));
            builder.append('"');
        });

        // Append a U+003E GREATER-THAN SIGN character (>).
        builder.append('>');

        // If current node serializes as void, then continue on to the next child node at this point.
        if (element.serializes_as_void())
            return IterationDecision::Continue;

        // Append the value of running the HTML fragment serialization algorithm with current node,
        // serializableShadowRoots, and shadowRoots (thus recursing into this algorithm for that node),
        // followed by a U+003C LESS-THAN SIGN character (<),
        // a U+002F SOLIDUS character (/),
        // tagname again,
        // and finally a U+003E GREATER-THAN SIGN character (>).
        builder.append(serialize_html_fragment(element, serializable_shadow_roots, shadow_roots));
        builder.append("</"sv);
        builder.append(tag_name);
        builder.append('>');

        return IterationDecision::Continue;
    };

    if (fragment_serialization_mode == DOM::FragmentSerializationMode::Outer) {
        serialize_element(as<DOM::Element>(node));
        return builder.to_string_without_validation();
    }

    // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node.
    VERIFY(node.is_element() || node.is_document() || node.is_document_fragment());
    GC::Ref<DOM::Node const> actual_node = node;

    if (is<DOM::Element>(node)) {
        auto const& element = as<DOM::Element>(node);

        // 1. If the node serializes as void, then return the empty string.
        //    (NOTE: serializes as void is defined only on elements in the spec)
        if (element.serializes_as_void())
            return String {};

        // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node).
        //    (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter)
        if (is<HTML::HTMLTemplateElement>(element))
            actual_node = as<HTML::HTMLTemplateElement>(element).content();

        // 4. If current node is a shadow host, then:
        if (element.is_shadow_host()) {
            // 1. Let shadow be current node's shadow root.
            auto shadow = element.shadow_root();

            // 2. If one of the following is true:
            //    - serializableShadowRoots is true and shadow's serializable is true; or
            //    - shadowRoots contains shadow,
            if ((serializable_shadow_roots == SerializableShadowRoots::Yes && shadow->serializable())
                || shadow_roots.contains([&](auto& entry) { return entry == shadow; })) {
                // then:
                // 1. Append "<template shadowrootmode="".
                builder.append("<template shadowrootmode=\""sv);

                // 2. If shadow's mode is "open", then append "open". Otherwise, append "closed".
                builder.append(shadow->mode() == Bindings::ShadowRootMode::Open ? "open"sv : "closed"sv);

                // 3. Append """.
                builder.append('"');

                // 4. If shadow's delegates focus is set, then append " shadowrootdelegatesfocus=""".
                if (shadow->delegates_focus())
                    builder.append(" shadowrootdelegatesfocus=\"\""sv);

                // 5. If shadow's serializable is set, then append " shadowrootserializable=""".
                if (shadow->serializable())
                    builder.append(" shadowrootserializable=\"\""sv);

                // 6. If shadow's slot assignment is "manual", then append " shadowrootslotassignment="manual"".
                if (shadow->slot_assignment() == Bindings::SlotAssignmentMode::Manual)
                    builder.append(" shadowrootslotassignment=\"manual\""sv);

                // 7. If shadow's clonable is set, then append " shadowrootclonable=""".
                if (shadow->clonable())
                    builder.append(" shadowrootclonable=\"\""sv);

                // 7. Let shouldAppendRegistryAttribute be the result of running these steps:
                auto should_append_registry_attribute = [&] {
                    // 1. Let documentRegistry be shadow's node document's custom element registry.
                    auto document_registry = shadow->document().custom_element_registry();

                    // 2. Let shadowRegistry be shadow's custom element registry.
                    auto shadow_registry = shadow->custom_element_registry();

                    // 3. If documentRegistry is null and shadowRegistry is null, then return false.
                    if (!document_registry && !shadow_registry)
                        return false;

                    // 4. If documentRegistry is a global custom element registry and shadowRegistry is a global custom
                    //    element registry, then return false.
                    if (is_a_global_custom_element_registry(document_registry) && is_a_global_custom_element_registry(shadow_registry))
                        return false;

                    // 5. Return true.
                    return true;
                }();

                // 8. If shouldAppendRegistryAttribute is true, then append " shadowrootcustomelementregistry=""".
                if (should_append_registry_attribute)
                    builder.append(" shadowrootcustomelementregistry=\"\""sv);

                // 9. Append ">".
                builder.append('>');

                // 10. Append the value of running the HTML fragment serialization algorithm with shadow,
                //    serializableShadowRoots, and shadowRoots (thus recursing into this algorithm for that element).
                builder.append(serialize_html_fragment(*shadow, serializable_shadow_roots, shadow_roots));

                // 11. Append "</template>".
                builder.append("</template>"sv);
            }
        }
    }

    // 5. For each child node of the node, in tree order, run the following steps:
    actual_node->for_each_child([&](DOM::Node& current_node) {
        // 1. Let current node be the child node being processed.

        // 2. Append the appropriate string from the following list to s:

        if (is<DOM::Element>(current_node)) {
            // -> If current node is an Element
            auto& element = as<DOM::Element>(current_node);
            serialize_element(element);
            return IterationDecision::Continue;
        }

        if (is<DOM::Text>(current_node)) {
            // -> If current node is a Text node
            auto& text_node = as<DOM::Text>(current_node);
            auto* parent = current_node.parent();

            if (is<DOM::Element>(parent)) {
                auto& parent_element = as<DOM::Element>(*parent);

                // If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element,
                // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally.
                if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext)
                    || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) {
                    builder.append(text_node.data());
                    return IterationDecision::Continue;
                }
            }

            // Otherwise, append the value of current node's data IDL attribute, escaped as described below.
            builder.append(escape_string(text_node.data().utf16_view(), AttributeMode::No));
        }

        if (is<DOM::Comment>(current_node)) {
            // -> If current node is a Comment
            auto& comment_node = as<DOM::Comment>(current_node);

            // Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS),
            // followed by the value of current node's data IDL attribute, followed by the literal string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
            builder.append("<!--"sv);
            builder.append(comment_node.data());
            builder.append("-->"sv);
            return IterationDecision::Continue;
        }

        if (is<DOM::ProcessingInstruction>(current_node)) {
            // -> If current node is a ProcessingInstruction
            auto& processing_instruction_node = as<DOM::ProcessingInstruction>(current_node);

            // Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), followed by the value of current node's target IDL attribute,
            // followed by a single U+0020 SPACE character, followed by the value of current node's data IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
            builder.append("<?"sv);
            builder.append(processing_instruction_node.target());
            builder.append(' ');
            builder.append(processing_instruction_node.data());
            builder.append('>');
            return IterationDecision::Continue;
        }

        if (is<DOM::DocumentType>(current_node)) {
            // -> If current node is a DocumentType
            auto& document_type_node = as<DOM::DocumentType>(current_node);

            // Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER O,
            // U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E),
            // followed by a space (U+0020 SPACE), followed by the value of current node's name IDL attribute, followed by the literal string ">" (U+003E GREATER-THAN SIGN).
            builder.append("<!DOCTYPE "sv);
            builder.append(document_type_node.name());
            builder.append('>');
            return IterationDecision::Continue;
        }

        return IterationDecision::Continue;
    });

    // 6. Return s.
    return MUST(builder.to_string());
}

// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#current-dimension-value
static RefPtr<CSS::StyleValue const> parse_current_dimension_value(float value, Utf8View input, Utf8View::Iterator position)
{
    // 1. If position is past the end of input, then return value as a length.
    if (position == input.end())
        return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));

    // 2. If the code point at position within input is U+0025 (%), then return value as a percentage.
    if (*position == '%')
        return CSS::PercentageStyleValue::create(CSS::Percentage(value));

    // 3. Return value as a length.
    return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));
}

// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-dimension-values
RefPtr<CSS::StyleValue const> parse_dimension_value(StringView string)
{
    // 1. Let input be the string being parsed.
    auto input = Utf8View(string);
    if (!input.validate())
        return nullptr;

    // 2. Let position be a position variable for input, initially pointing at the start of input.
    auto position = input.begin();

    // 3. Skip ASCII whitespace within input given position.
    while (position != input.end() && Infra::is_ascii_whitespace(*position))
        ++position;

    // 4. If position is past the end of input or the code point at position within input is not an ASCII digit,
    //    then return failure.
    if (position == input.end() || !is_ascii_digit(*position))
        return nullptr;

    // 5. Collect a sequence of code points that are ASCII digits from input given position,
    //    and interpret the resulting sequence as a base-ten integer. Let value be that number.
    StringBuilder number_string;
    while (position != input.end() && is_ascii_digit(*position)) {
        number_string.append(*position);
        ++position;
    }
    auto integer_value = number_string.string_view().to_number<double>();

    float value = min(*integer_value, CSSPixels::max_dimension_value);

    // 6. If position is past the end of input, then return value as a length.
    if (position == input.end())
        return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels(value)));

    // 7. If the code point at position within input is U+002E (.), then:
    if (*position == '.') {
        // 1. Advance position by 1.
        ++position;

        // 2. If position is past the end of input or the code point at position within input is not an ASCII digit,
        //    then return the current dimension value with value, input, and position.
        if (position == input.end() || !is_ascii_digit(*position))
            return parse_current_dimension_value(value, input, position);

        // 3. Let divisor have the value 1.
        float divisor = 1;

        // 4. While true:
        while (true) {
            // 1. Multiply divisor by ten.
            divisor *= 10;

            // 2. Add the value of the code point at position within input,
            //    interpreted as a base-ten digit (0..9) and divided by divisor, to value.
            value += (*position - '0') / divisor;

            // 3. Advance position by 1.
            ++position;

            // 4. If position is past the end of input, then return value as a length.
            if (position == input.end())
                return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));

            // 5. If the code point at position within input is not an ASCII digit, then break.
            if (!is_ascii_digit(*position))
                break;
        }
    }

    // 8. Return the current dimension value with value, input, and position.
    return parse_current_dimension_value(value, input, position);
}

// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-zero-dimension-values
RefPtr<CSS::StyleValue const> parse_nonzero_dimension_value(StringView string)
{
    // 1. Let input be the string being parsed.
    // 2. Let value be the result of parsing input using the rules for parsing dimension values.
    auto value = parse_dimension_value(string);

    // 3. If value is an error, return an error.
    if (!value)
        return nullptr;

    // 4. If value is zero, return an error.
    if (value->is_length() && value->as_length().raw_value() == 0)
        return nullptr;
    if (value->is_percentage() && value->as_percentage().percentage().value() == 0)
        return nullptr;

    // 5. If value is a percentage, return value as a percentage.
    // 6. Return value as a length.
    return value;
}

// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-a-legacy-colour-value
Optional<Color> parse_legacy_color_value(StringView string_view)
{
    // 1. If input is the empty string, then return failure.
    if (string_view.is_empty())
        return {};

    ByteString input = string_view;

    // 2. Strip leading and trailing ASCII whitespace from input.
    input = input.trim(Infra::ASCII_WHITESPACE);

    // 3. If input is an ASCII case-insensitive match for "transparent", then return failure.
    if (input.equals_ignoring_ascii_case("transparent"sv))
        return {};

    // 4. If input is an ASCII case-insensitive match for one of the named colors, then return the CSS color corresponding to that keyword. [CSSCOLOR]
    if (auto const color = Color::from_named_css_color_string(input); color.has_value())
        return color;

    auto hex_nibble_to_u8 = [](char nibble) -> u8 {
        if (nibble >= '0' && nibble <= '9')
            return nibble - '0';
        if (nibble >= 'a' && nibble <= 'f')
            return nibble - 'a' + 10;
        return nibble - 'A' + 10;
    };

    // 5. If input's code point length is four, and the first character in input is U+0023 (#), and the last three characters of input are all ASCII hex digits, then:
    if (input.length() == 4 && input[0] == '#' && is_ascii_hex_digit(input[1]) && is_ascii_hex_digit(input[2]) && is_ascii_hex_digit(input[3])) {
        // 1. Let result be a CSS color.
        Color result;
        result.set_alpha(0xFF);

        // 2. Interpret the second character of input as a hexadecimal digit; let the red component of result be the resulting number multiplied by 17.
        result.set_red(hex_nibble_to_u8(input[1]) * 17);

        // 3. Interpret the third character of input as a hexadecimal digit; let the green component of result be the resulting number multiplied by 17.
        result.set_green(hex_nibble_to_u8(input[2]) * 17);

        // 4. Interpret the fourth character of input as a hexadecimal digit; let the blue component of result be the resulting number multiplied by 17.
        result.set_blue(hex_nibble_to_u8(input[3]) * 17);

        // 5. Return result.
        return result;
    }

    // 6. Replace any code points greater than U+FFFF in input (i.e., any characters that are not in the basic multilingual plane) with "00".
    auto replace_non_basic_multilingual_code_points = [](StringView string) -> ByteString {
        StringBuilder builder;
        for (auto code_point : Utf8View { string }) {
            if (code_point > 0xFFFF)
                builder.append("00"sv);
            else
                builder.append_code_point(code_point);
        }
        return builder.to_byte_string();
    };
    input = replace_non_basic_multilingual_code_points(input);

    // 7. If input's code point length is greater than 128, truncate input, leaving only the first 128 characters.
    if (input.length() > 128)
        input = input.substring(0, 128);

    // 8. If the first character in input is U+0023 (#), then remove it.
    if (input[0] == '#')
        input = input.substring(1);

    // 9. Replace any character in input that is not an ASCII hex digit with U+0030 (0).
    auto replace_non_ascii_hex = [](StringView string) -> ByteString {
        StringBuilder builder;
        for (auto code_point : Utf8View { string }) {
            if (is_ascii_hex_digit(code_point))
                builder.append_code_point(code_point);
            else
                builder.append_code_point('0');
        }
        return builder.to_byte_string();
    };
    input = replace_non_ascii_hex(input);

    // 10. While input's code point length is zero or not a multiple of three, append U+0030 (0) to input.
    StringBuilder builder;
    builder.append(input);
    while (builder.length() == 0 || (builder.length() % 3 != 0))
        builder.append_code_point('0');
    input = builder.to_byte_string();

    // 11. Split input into three strings of equal code point length, to obtain three components. Let length be the code point length that all of those components have (one third the code point length of input).
    auto length = input.length() / 3;
    auto first_component = input.substring_view(0, length);
    auto second_component = input.substring_view(length, length);
    auto third_component = input.substring_view(length * 2, length);

    // 12. If length is greater than 8, then remove the leading length-8 characters in each component, and let length be 8.
    if (length > 8) {
        first_component = first_component.substring_view(length - 8);
        second_component = second_component.substring_view(length - 8);
        third_component = third_component.substring_view(length - 8);
        length = 8;
    }

    // 13. While length is greater than two and the first character in each component is U+0030 (0), remove that character and reduce length by one.
    while (length > 2 && first_component[0] == '0' && second_component[0] == '0' && third_component[0] == '0') {
        --length;
        first_component = first_component.substring_view(1);
        second_component = second_component.substring_view(1);
        third_component = third_component.substring_view(1);
    }

    // 14. If length is still greater than two, truncate each component, leaving only the first two characters in each.
    if (length > 2) {
        first_component = first_component.substring_view(0, 2);
        second_component = second_component.substring_view(0, 2);
        third_component = third_component.substring_view(0, 2);
    }

    auto to_hex = [&](StringView string) -> u8 {
        if (length == 1) {
            return hex_nibble_to_u8(string[0]);
        }
        auto nib1 = hex_nibble_to_u8(string[0]);
        auto nib2 = hex_nibble_to_u8(string[1]);
        return nib1 << 4 | nib2;
    };

    // 15. Let result be a CSS color.
    Color result;
    result.set_alpha(0xFF);

    // 16. Interpret the first component as a hexadecimal number; let the red component of result be the resulting number.
    result.set_red(to_hex(first_component));

    // 17. Interpret the second component as a hexadecimal number; let the green component of result be the resulting number.
    result.set_green(to_hex(second_component));

    // 18. Interpret the third component as a hexadecimal number; let the blue component of result be the resulting number.
    result.set_blue(to_hex(third_component));

    // 19. Return result.
    return result;
}

// https://html.spec.whatwg.org/multipage/rendering.html#tables-2
RefPtr<CSS::StyleValue const> parse_table_child_element_align_value(StringView string_view)
{
    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
    // case-insensitive match for either the string "center" or the string "middle", are expected to center text within
    // themselves, as if they had their 'text-align' property set to 'center' in a presentational hint, and to align
    // descendants to the center.
    if (string_view.equals_ignoring_ascii_case("center"sv) || string_view.equals_ignoring_ascii_case("middle"sv))
        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebCenter);

    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
    // case-insensitive match for the string "left", are expected to left-align text within themselves, as if they had
    // their 'text-align' property set to 'left' in a presentational hint, and to align descendants to the left.
    if (string_view.equals_ignoring_ascii_case("left"sv))
        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebLeft);

    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
    // case-insensitive match for the string "right", are expected to right-align text within themselves, as if they
    // had their 'text-align' property set to 'right' in a presentational hint, and to align descendants to the right.
    if (string_view.equals_ignoring_ascii_case("right"sv))
        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebRight);

    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
    // case-insensitive match for the string "justify", are expected to full-justify text within themselves, as if they
    // had their 'text-align' property set to 'justify' in a presentational hint, and to align descendants to the left.
    if (string_view.equals_ignoring_ascii_case("justify"sv))
        return CSS::KeywordStyleValue::create(CSS::Keyword::Justify);

    return nullptr;
}

JS::Realm& HTMLParser::realm()
{
    return m_document->realm();
}

// https://html.spec.whatwg.org/multipage/parsing.html#start-the-speculative-html-parser
void HTMLParser::start_the_speculative_html_parser()
{
    // 1. Optionally, return.
    // NOTE: We do not opt out.

    // 2. If parser's active speculative HTML parser is not null, then stop the speculative HTML parser for parser.
    if (m_active_speculative_html_parser)
        stop_the_speculative_html_parser();

    // 3. Let speculativeParser be a new speculative HTML parser, with the same state as parser.
    // 4. Let speculativeDoc be a new isomorphic representation of parser's Document, where all elements are instead
    //    speculative mock elements. Let speculativeParser parse into speculativeDoc.
    // NOTE: Speculative mock elements are produced on the fly during run(); we do not materialize a full speculativeDoc tree.
    auto speculative_parser = SpeculativeHTMLParser::create(realm(), *m_document, m_tokenizer.unparsed_input(), m_document->base_url());

    // 5. Set parser's active speculative HTML parser to speculativeParser.
    m_active_speculative_html_parser = speculative_parser;

    // 6. In parallel, run speculativeParser until it is stopped or until it reaches the end of its input stream.
    speculative_parser->run();
}

// https://html.spec.whatwg.org/multipage/parsing.html#stop-the-speculative-html-parser
void HTMLParser::stop_the_speculative_html_parser()
{
    // 1. Let speculativeParser be parser's active speculative HTML parser.
    auto speculative_parser = m_active_speculative_html_parser;

    // 2. If speculativeParser is null, then return.
    if (!speculative_parser)
        return;

    // 3. Throw away any pending content in speculativeParser's input stream, and discard any future content that would
    //    have been added to it.
    speculative_parser->stop();

    // 4. Set parser's active speculative HTML parser to null.
    m_active_speculative_html_parser = nullptr;
}

// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
void HTMLParser::abort()
{
    // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it.
    m_tokenizer.abort();

    // 2. Stop the speculative HTML parser for this HTML parser.
    stop_the_speculative_html_parser();

    // 3. Update the current document readiness to "interactive".
    m_document->update_readiness(DocumentReadyState::Interactive);

    // 4. Pop all the nodes off the stack of open elements.
    pop_all_open_elements();

    // 5. Update the current document readiness to "complete".
    m_document->update_readiness(DocumentReadyState::Complete);

    m_aborted = true;
}

static StringView html_parser_ffi_string_view(u8 const* ptr, size_t len)
{
    if (ptr == nullptr || len == 0)
        return {};
    return { ptr, len };
}

static FlyString fly_string_from_html_parser_ffi(u8 const* ptr, size_t len)
{
    return MUST(FlyString::from_utf8(html_parser_ffi_string_view(ptr, len)));
}

static String string_from_html_parser_ffi(u8 const* ptr, size_t len)
{
    return MUST(String::from_utf8(html_parser_ffi_string_view(ptr, len)));
}

extern "C" void ladybird_html_parser_log_parse_error(void* parser, u8 const* message_ptr, size_t message_len)
{
    (void)parser_from_html_parser_ffi(parser);
    dbgln_if(HTML_PARSER_DEBUG, "Rust parser parse error: {}", html_parser_ffi_string_view(message_ptr, message_len));
}

extern "C" void ladybird_html_parser_stop_parsing(void* parser)
{
    parser_from_html_parser_ffi(parser).stop_parsing_from_rust_parser();
}

extern "C" bool ladybird_html_parser_parse_errors_enabled()
{
    return HTML_PARSER_DEBUG;
}

extern "C" void ladybird_html_parser_visit_node(void* visitor, size_t node)
{
    if (node == 0)
        return;
    static_cast<GC::Cell::Visitor*>(visitor)->visit(node_from_html_parser_ffi(node));
}

static Optional<FlyString> namespace_from_html_parser_ffi(RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len)
{
    switch (namespace_) {
    case RustFfiHtmlNamespace::Html:
        return Namespace::HTML;
    case RustFfiHtmlNamespace::MathMl:
        return Namespace::MathML;
    case RustFfiHtmlNamespace::Svg:
        return Namespace::SVG;
    case RustFfiHtmlNamespace::Other:
        if (namespace_uri_len == 0)
            return {};
        return fly_string_from_html_parser_ffi(namespace_uri_ptr, namespace_uri_len);
    }
    VERIFY_NOT_REACHED();
}

static Optional<FlyString> attribute_namespace_from_html_parser_ffi(RustFfiHtmlAttributeNamespace namespace_)
{
    switch (namespace_) {
    case RustFfiHtmlAttributeNamespace::None:
        return {};
    case RustFfiHtmlAttributeNamespace::XLink:
        return Namespace::XLink;
    case RustFfiHtmlAttributeNamespace::Xml:
        return Namespace::XML;
    case RustFfiHtmlAttributeNamespace::Xmlns:
        return Namespace::XMLNS;
    case RustFfiHtmlAttributeNamespace::Other:
        // Only fragment context attributes use this sentinel; parser-created attributes do not cross this path with
        // arbitrary namespace URIs.
        VERIFY_NOT_REACHED();
    }
    VERIFY_NOT_REACHED();
}

static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional<FlyString> const& namespace_)
{
    if (namespace_ == Namespace::XLink)
        return RustFfiHtmlAttributeNamespace::XLink;
    if (namespace_ == Namespace::XML)
        return RustFfiHtmlAttributeNamespace::Xml;
    if (namespace_ == Namespace::XMLNS)
        return RustFfiHtmlAttributeNamespace::Xmlns;
    if (namespace_.has_value())
        return RustFfiHtmlAttributeNamespace::Other;
    return RustFfiHtmlAttributeNamespace::None;
}

static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional<FlyString> const& namespace_)
{
    if (!namespace_.has_value())
        return RustFfiHtmlNamespace::Other;
    if (namespace_ == Namespace::HTML)
        return RustFfiHtmlNamespace::Html;
    if (namespace_ == Namespace::MathML)
        return RustFfiHtmlNamespace::MathMl;
    if (namespace_ == Namespace::SVG)
        return RustFfiHtmlNamespace::Svg;
    return RustFfiHtmlNamespace::Other;
}

static DOM::QuirksMode quirks_mode_from_html_parser_ffi(RustFfiHtmlQuirksMode mode)
{
    switch (mode) {
    case RustFfiHtmlQuirksMode::No:
        return DOM::QuirksMode::No;
    case RustFfiHtmlQuirksMode::Limited:
        return DOM::QuirksMode::Limited;
    case RustFfiHtmlQuirksMode::Yes:
        return DOM::QuirksMode::Yes;
    }
    VERIFY_NOT_REACHED();
}

static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode mode)
{
    switch (mode) {
    case DOM::QuirksMode::No:
        return RustFfiHtmlQuirksMode::No;
    case DOM::QuirksMode::Limited:
        return RustFfiHtmlQuirksMode::Limited;
    case DOM::QuirksMode::Yes:
        return RustFfiHtmlQuirksMode::Yes;
    }
    VERIFY_NOT_REACHED();
}

static HTMLParser& parser_from_html_parser_ffi(void* parser)
{
    VERIFY(parser);
    return *reinterpret_cast<HTMLParser*>(parser);
}

static DOM::Node& node_from_html_parser_ffi(size_t node)
{
    VERIFY(node);
    return *reinterpret_cast<DOM::Node*>(node);
}

extern "C" size_t ladybird_html_parser_document_node(void* parser)
{
    return reinterpret_cast<size_t>(&parser_from_html_parser_ffi(parser).document());
}

extern "C" size_t ladybird_html_parser_document_html_element(void* parser)
{
    auto* html_element = parser_from_html_parser_ffi(parser).document().document_element();
    if (!html_element || !is<HTMLHtmlElement>(*html_element))
        return 0;
    return reinterpret_cast<size_t>(html_element);
}

extern "C" void ladybird_html_parser_set_document_quirks_mode(void* parser, RustFfiHtmlQuirksMode mode)
{
    auto& document = parser_from_html_parser_ffi(parser).document();
    if (!document.parser_cannot_change_the_mode())
        document.set_quirks_mode(quirks_mode_from_html_parser_ffi(mode));
}

extern "C" size_t ladybird_html_parser_create_document_type(void* parser, u8 const* name_ptr, size_t name_len, u8 const* public_id_ptr, size_t public_id_len, u8 const* system_id_ptr, size_t system_id_len)
{
    auto& html_parser = parser_from_html_parser_ffi(parser);
    auto document_type = html_parser.document().realm().create<DOM::DocumentType>(html_parser.document());
    document_type->set_name(string_from_html_parser_ffi(name_ptr, name_len));
    document_type->set_public_id(string_from_html_parser_ffi(public_id_ptr, public_id_len));
    document_type->set_system_id(string_from_html_parser_ffi(system_id_ptr, system_id_len));
    return reinterpret_cast<size_t>(document_type.ptr());
}

extern "C" size_t ladybird_html_parser_create_comment(void* parser, u8 const* data_ptr, size_t data_len)
{
    auto& html_parser = parser_from_html_parser_ffi(parser);
    auto comment = html_parser.document().realm().create<DOM::Comment>(html_parser.document(), Utf16String::from_utf8(string_from_html_parser_ffi(data_ptr, data_len)));
    return reinterpret_cast<size_t>(comment.ptr());
}

extern "C" void ladybird_html_parser_insert_text(size_t parent, size_t before, u8 const* data_ptr, size_t data_len)
{
    auto& parent_node = node_from_html_parser_ffi(parent);
    if (parent_node.is_document())
        return;

    auto data = Utf16String::from_utf8(string_from_html_parser_ffi(data_ptr, data_len));
    if (before) {
        auto& before_node = node_from_html_parser_ffi(before);
        if (auto* previous_text = as_if<DOM::Text>(before_node.previous_sibling())) {
            (void)previous_text->append_data(data);
            return;
        }
        auto text = parent_node.document().realm().create<DOM::Text>(parent_node.document(), data);
        parent_node.insert_before(*text, &before_node);
        return;
    }

    if (auto* last_text = as_if<DOM::Text>(parent_node.last_child())) {
        (void)last_text->append_data(data);
        return;
    }

    auto text = parent_node.document().realm().create<DOM::Text>(parent_node.document(), data);
    MUST(parent_node.append_child(*text));
}

extern "C" void ladybird_html_parser_add_missing_attribute(size_t element, u8 const* local_name_ptr, size_t local_name_len, u8 const* value_ptr, size_t value_len)
{
    auto& dom_element = as<DOM::Element>(node_from_html_parser_ffi(element));
    auto local_name = fly_string_from_html_parser_ffi(local_name_ptr, local_name_len);
    if (dom_element.has_attribute(local_name))
        return;
    dom_element.append_attribute(local_name, string_from_html_parser_ffi(value_ptr, value_len));
}

extern "C" void ladybird_html_parser_remove_node(size_t node)
{
    node_from_html_parser_ffi(node).remove(true);
}

extern "C" void ladybird_html_parser_handle_element_popped(size_t element)
{
    // https://html.spec.whatwg.org/multipage/form-elements.html#the-option-element
    // When an option element is popped off the stack of open elements of an HTML parser or XML parser,
    // the user agent must run maybe clone an option into selectedcontent given the option element.
    // AD-HOC: The Rust tree builder flushes buffered text before invoking this hook, so the option's content is
    // up-to-date before cloning.
    if (auto* option_element = as_if<HTML::HTMLOptionElement>(node_from_html_parser_ffi(element)))
        MUST(option_element->maybe_clone_into_selectedcontent());
}

extern "C" void ladybird_html_parser_prepare_svg_script(void* parser, size_t element, size_t source_line_number)
{
    parser_from_html_parser_ffi(parser).prepare_svg_script_for_rust_parser(as<SVG::SVGScriptElement>(node_from_html_parser_ffi(element)), source_line_number);
}

extern "C" void ladybird_html_parser_set_script_source_line(void* parser, size_t element, size_t source_line_number)
{
    parser_from_html_parser_ffi(parser).set_script_source_line_from_rust_parser(as<DOM::Element>(node_from_html_parser_ffi(element)), source_line_number);
}

extern "C" void ladybird_html_parser_mark_script_already_started(void* parser, size_t element)
{
    if (auto* script = as_if<HTMLScriptElement>(node_from_html_parser_ffi(element)))
        parser_from_html_parser_ffi(parser).mark_script_already_started_from_rust_parser(*script);
}

extern "C" size_t ladybird_html_parser_parent_node(size_t node)
{
    auto* parent = node_from_html_parser_ffi(node).parent();
    return reinterpret_cast<size_t>(parent);
}

extern "C" size_t ladybird_html_parser_create_element(void* parser, size_t intended_parent, RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len, u8 const* local_name_ptr, size_t local_name_len, RustFfiHtmlParserAttribute const* attributes, size_t attribute_count, bool had_duplicate_attribute, size_t form_element, bool has_template_element_on_stack)
{
    auto& html_parser = parser_from_html_parser_ffi(parser);
    auto local_name = fly_string_from_html_parser_ffi(local_name_ptr, local_name_len);
    auto token = HTMLToken::make_start_tag(local_name);

    for (size_t i = 0; i < attribute_count; ++i) {
        auto const& attribute = attributes[i];
        Optional<FlyString> prefix;
        if (attribute.prefix_len != 0)
            prefix = fly_string_from_html_parser_ffi(attribute.prefix_ptr, attribute.prefix_len);
        HTMLToken::Attribute token_attribute;
        token_attribute.prefix = move(prefix);
        token_attribute.local_name = fly_string_from_html_parser_ffi(attribute.local_name_ptr, attribute.local_name_len);
        token_attribute.namespace_ = attribute_namespace_from_html_parser_ffi(attribute.namespace_);
        token_attribute.value = string_from_html_parser_ffi(attribute.value_ptr, attribute.value_len);
        token.add_attribute(move(token_attribute));
    }

    auto& intended_parent_node = node_from_html_parser_ffi(intended_parent);
    GC::Ptr<HTMLFormElement> form_element_ptr;
    if (form_element)
        form_element_ptr = as<HTMLFormElement>(node_from_html_parser_ffi(form_element));
    auto element = html_parser.create_element_for_rust_parser(token, namespace_from_html_parser_ffi(namespace_, namespace_uri_ptr, namespace_uri_len), intended_parent_node, had_duplicate_attribute, form_element_ptr, has_template_element_on_stack);

    return reinterpret_cast<size_t>(element.ptr());
}

extern "C" void ladybird_html_parser_append_child(size_t parent, size_t child)
{
    MUST(node_from_html_parser_ffi(parent).append_child(node_from_html_parser_ffi(child)));
}

extern "C" void ladybird_html_parser_insert_node(size_t parent, size_t before, size_t child, bool queue_custom_element_reactions)
{
    auto& parent_node = node_from_html_parser_ffi(parent);
    auto& child_node = node_from_html_parser_ffi(child);
    auto* child_element = as_if<DOM::Element>(child_node);
    if (queue_custom_element_reactions && child_element)
        relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.append({});

    if (!before) {
        MUST(parent_node.append_child(child_node));
    } else {
        auto& before_node = node_from_html_parser_ffi(before);
        parent_node.insert_before(child_node, &before_node, false);
    }

    if (queue_custom_element_reactions && child_element) {
        auto queue = relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.take_last();
        Bindings::invoke_custom_element_reactions(queue);
    }
}

extern "C" void ladybird_html_parser_move_all_children(size_t from, size_t to)
{
    auto& from_node = node_from_html_parser_ffi(from);
    auto& to_node = node_from_html_parser_ffi(to);
    for (auto& child : from_node.children_as_vector())
        MUST(to_node.append_child(from_node.remove_child(*child).release_value()));
}

extern "C" size_t ladybird_html_parser_template_content(size_t element)
{
    auto& template_element = as<HTMLTemplateElement>(node_from_html_parser_ffi(element));
    return reinterpret_cast<size_t>(template_element.content().ptr());
}

extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t host, RustFfiHtmlShadowRootMode mode, RustFfiHtmlSlotAssignmentMode slot_assignment, bool clonable, bool serializable, bool delegates_focus, bool keep_custom_element_registry_null)
{
    auto& host_element = as<DOM::Element>(node_from_html_parser_ffi(host));
    if (host_element.is_shadow_host())
        return 0;

    GC::Ptr<CustomElementRegistry> registry;
    if (!keep_custom_element_registry_null)
        registry = host_element.document().custom_element_registry();

    auto result = host_element.attach_a_shadow_root(
        mode == RustFfiHtmlShadowRootMode::Open ? Bindings::ShadowRootMode::Open : Bindings::ShadowRootMode::Closed,
        clonable,
        serializable,
        delegates_focus,
        slot_assignment == RustFfiHtmlSlotAssignmentMode::Manual ? Bindings::SlotAssignmentMode::Manual : Bindings::SlotAssignmentMode::Named,
        registry);
    if (result.is_error())
        return 0;

    auto shadow_root = host_element.shadow_root();
    VERIFY(shadow_root);
    shadow_root->set_declarative(true);
    shadow_root->set_available_to_element_internals(true);
    if (keep_custom_element_registry_null)
        shadow_root->set_keep_custom_element_registry_null(true);
    return reinterpret_cast<size_t>(shadow_root.ptr());
}

extern "C" void ladybird_html_parser_set_template_content(size_t element, size_t content)
{
    as<HTMLTemplateElement>(node_from_html_parser_ffi(element)).set_template_contents(as<DOM::DocumentFragment>(node_from_html_parser_ffi(content)));
}

extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t node)
{
    return node_from_html_parser_ffi(node).document().allow_declarative_shadow_roots();
}

}
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								/*
-												LibWeb: Block rendering until linked stylesheets are loaded

This commit implements the main "render blocking" behavior for link
elements, drastically reducing the amount of FOUC (flash of unstyled
content) we subject our users to.

The document will now block rendering until linked style sheets
referenced by parser-created link elements have loaded (or failed).

Note that we don't yet extend the blocking period until "critical
subresources" such as imported style sheets have been downloaded
as well.

											
										
										
											2025-02-27 15:30:26 +01:00
+								 * Copyright (c) 2020-2025, Andreas Kling <andreas@ladybird.org>
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								 * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
-												LibWeb: Only wait for document to be ready for scripts if executing one

HTML fragments are parsed with a temporary HTML document that never has
its flag set to say that it is ready to have scripts executed. For these
fragments, in the HTMLParser, these scripts are prepared, but
execute_script is never called on them.

This results in the HTMLParser waiting forever on the document to be
ready to have scripts executed.

To fix this, only wait for the document to be ready if we are definitely
going to execute a script.

This fixes a hang processing the HTML in the attached test, as seen on:
https://github.com/SerenityOS/serenity

Fixes: #22735

											
										
										
											2024-01-14 15:59:38 +13:00
+								 * Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
-												LibWeb: Keep the tokens in `ListOfActiveFormattingElements`

											
										
										
											2025-10-20 04:06:01 +02:00
+								 * Copyright (c) 2025, Lorenz Ackermann <me@lorenzackermann.xyz>
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								 *
-												Everything: Move to SPDX license identifiers in all files.

SPDX License Identifiers are a more compact / standardized
way of representing file license information.

See: https://spdx.dev/resources/use/#identifiers

This was done with the `ambr` search and replace tool.

 ambr --no-parent-ignore --key-from-file --rep-from-file key.txt rep.txt *

											
										
										
											2021-04-22 01:24:48 -07:00
+								 * SPDX-License-Identifier: BSD-2-Clause
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								 */
-												Everywhere: Replace a bundle of dbg with dbgln.

These changes are arbitrarily divided into multiple commits to make it
easier to find potentially introduced bugs with git bisect.

											
										
										
											2021-01-17 16:57:17 +01:00
+								#include <AK/Debug.h>
-												LibWeb: Use standardized encoding names, add encoding attribute to document

											
										
										
											2020-11-13 11:16:28 +00:00
+								#include <LibTextCodec/Decoder.h>
-												LibWeb: Parse declarative shadow DOM template elements

We now honor the shadowrootmode attribute on template elements while
parsing, and instantiate a shadow tree as required by the spec.

											
										
										
											2024-06-25 09:43:50 +02:00
+								#include <LibWeb/Bindings/ExceptionOrUtils.h>
-												LibWeb+LibJS: Make the EventTarget hierarchy (incl. DOM) GC-allocated

This is a monster patch that turns all EventTargets into GC-allocated
PlatformObjects. Their C++ wrapper classes are removed, and the LibJS
garbage collector is now responsible for their lifetimes.

There's a fair amount of hacks and band-aids in this patch, and we'll
have a lot of cleanup to do after this.

											
										
										
											2022-08-28 13:42:07 +02:00
+								#include <LibWeb/Bindings/MainThreadVM.h>
-												LibWeb/HTML: Support `align` attributes on table sections and rows

thead, tbody, tfoot, tr, td, and th all have an `align` presentational
attribute with identical definitions. We previously only supported it
for td and th, and also allowed arbitrary text-align values instead of
the 4 dictated by the spec.

											
										
										
											2026-04-30 11:15:16 +01:00
+								#include <LibWeb/CSS/Parser/Parser.h>
-												LibWeb: Split LengthStyleValue out of StyleValue.{h,cpp}

											
										
										
											2023-03-24 17:04:04 +00:00
+								#include <LibWeb/CSS/StyleValues/LengthStyleValue.h>
-												LibWeb: Split PercentageStyleValue out of StyleValue.{h,cpp}

											
										
										
											2023-03-24 17:28:43 +00:00
+								#include <LibWeb/CSS/StyleValues/PercentageStyleValue.h>
-												LibWeb: Properly append attributes to element when creating an Element

The main behavioural difference here is that the full qualified name is
appended to the element, rather than just the local name and value.

											
										
										
											2023-10-04 17:45:48 +13:00
+								#include <LibWeb/DOM/Attr.h>
-												LibWeb: Support comments in the "in head" insertion mode

											
										
										
											2020-05-24 20:29:01 +02:00
+								#include <LibWeb/DOM/Comment.h>
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								#include <LibWeb/DOM/Document.h>
 								#include <LibWeb/DOM/DocumentType.h>
-												LibWeb: Respect scroll position set by script during page load

When setting scroll position during page load we need to consider
whether we actually have a fragment to scroll to. A script may already
have run at that point and may already have set a scroll position.

If there is an actual fragment to scroll to, it is fine to scroll to
that fragment, since it should take precedence. If we don't have a
fragment however, we should not unnecessarily overwrite the scroll
position set by the script back to (0, 0).

Since this problem is caused by a spec bug, I have tested the behavior
in the three major browsers engines. Unfortunately they do not agree
fully with each other. If there is no fragment at all (e.g. `foo.html`),
all browsers will respect the scroll position set by the script. If
there is a fragment (e.g. `foo.html#bar`), all browsers will set the
scroll position to the fragment element and ignore the one set by
script. However, when the fragment is empty (e.g. `foo.html#`), then
Blink and WebKit will set scroll position to the fragment, while Gecko
will set scroll position from script. Since all of this is ad-hoc
behavior anyway, I simply implemented the Blink/WebKit behavior because
of the majority vote for now.

This fixes a regression introduced in 51102254b5.

											
										
										
											2025-03-07 22:04:36 +01:00
+								#include <LibWeb/DOM/Element.h>
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								#include <LibWeb/DOM/ElementFactory.h>
-												LibWeb: Fire a DOMContentLoaded event when the new parser is finished

With this change, we can finally load and render welcome.html :^)

											
										
										
											2020-05-27 23:32:50 +02:00
+								#include <LibWeb/DOM/Event.h>
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								#include <LibWeb/DOM/NamedNodeMap.h>
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								#include <LibWeb/DOM/ProcessingInstruction.h>
-												LibWeb: Properly append attributes to element when creating an Element

The main behavioural difference here is that the full qualified name is
appended to the element, rather than just the local name and value.

											
										
										
											2023-10-04 17:45:48 +13:00
+								#include <LibWeb/DOM/QualifiedName.h>
-												LibWeb: Parse declarative shadow DOM template elements

We now honor the shadowrootmode attribute on template elements while
parsing, and instantiate a shadow tree as required by the spec.

											
										
										
											2024-06-25 09:43:50 +02:00
+								#include <LibWeb/DOM/ShadowRoot.h>
-												LibWeb: Move HTML classes into the Web::HTML namespace

											
										
										
											2020-07-28 18:20:36 +02:00
+								#include <LibWeb/DOM/Text.h>
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
+								#include <LibWeb/HTML/CustomElements/CustomElementDefinition.h>
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								#include <LibWeb/HTML/CustomElements/CustomElementRegistry.h>
-												LibWeb: Spin the event loop in HTML parser until scripts can run

Call HTML::EventLoop::spin_until() from the HTML parser when deciding
whether we can run a script yet.

Note that spin_until() actually doesn't do any work yet.

											
										
										
											2021-09-09 02:15:44 +02:00
+								#include <LibWeb/HTML/EventLoop/EventLoop.h>
-												LibWeb: Add HTML::EventNames and UIEvents::EventNames

											
										
										
											2020-11-21 19:15:57 +00:00
+								#include <LibWeb/HTML/EventNames.h>
-												LibWeb: Move HTML object model stuff into LibWeb/HTML/

Take a hint from SVG and more all the HTML classes into HTML instead of
mixing them with the DOM classes.

											
										
										
											2020-07-26 15:08:16 +02:00
+								#include <LibWeb/HTML/HTMLFormElement.h>
-												LibWeb: Implement HTML frameset parsing in the "in body" state

Covered by many WPT parsing tests, which will be imported.

											
										
										
											2024-11-02 23:19:17 +01:00
+								#include <LibWeb/HTML/HTMLHtmlElement.h>
-												LibWeb: Block rendering until linked stylesheets are loaded

This commit implements the main "render blocking" behavior for link
elements, drastically reducing the amount of FOUC (flash of unstyled
content) we subject our users to.

The document will now block rendering until linked style sheets
referenced by parser-created link elements have loaded (or failed).

Note that we don't yet extend the blocking period until "critical
subresources" such as imported style sheets have been downloaded
as well.

											
										
										
											2025-02-27 15:30:26 +01:00
+								#include <LibWeb/HTML/HTMLLinkElement.h>
-												LibWeb: Add HTMLSelectedContentElement for customizable select

Introduce the HTMLSelectedContentElement and integrate it into
<select>, <option> and HTMLParser.

See whatwg/html#10548.

There are two bugs with WPT tests which causes the third subtest
in selectedcontent.html and selectedcontent-mutations.html fail.
See whatwg/html#11882, web-platform-tests/wpt#55849.

											
										
										
											2025-12-07 16:37:54 -08:00
+								#include <LibWeb/HTML/HTMLOptionElement.h>
-												LibWeb: Move HTML object model stuff into LibWeb/HTML/

Take a hint from SVG and more all the HTML classes into HTML instead of
mixing them with the DOM classes.

											
										
										
											2020-07-26 15:08:16 +02:00
+								#include <LibWeb/HTML/HTMLScriptElement.h>
-												LibWeb: Implement <template> parsing

Note that there is currently no way to display them as we can't
currently clone nodes.

Adds special case for templates for dumping to console.
Doesn't add it to the DOM inspector as I'm not sure how to do it.

											
										
										
											2020-08-19 22:30:33 +01:00
+								#include <LibWeb/HTML/HTMLTemplateElement.h>
-												LibWeb: Implement encoding sniffing algorithm

This patch implements the HTML specification's "encoding sniffing
algorithm", which is used when no encoding can be obtained from the
Content-Type header (either because it doesn't contain a charset=...)
value or the file has not been opened via HTTP (as with local files).

It also modifies the creator of the HTMLDocumentParser to use the new
HTMLDocumentParser::create_with_uncertain_encoding static method, which
runs the encoding sniffing algorithm before instantiating the parser.

This now allows us to load local HTML pages (or remote pages without a
charset specified in the 'Content-Type' header) with a non-UTF-8
encoding such as 'windows-1252'. This would previously crash the
browser. :^)

											
										
										
											2021-05-12 10:47:12 +02:00
+								#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
-												LibWeb: Rename HTMLDocumentParser => HTMLParser

											
										
										
											2021-09-25 23:15:48 +02:00
+								#include <LibWeb/HTML/Parser/HTMLParser.h>
-												LibWeb: Move the HTML parser into HTML/Parser/

											
										
										
											2020-07-28 19:18:23 +02:00
+								#include <LibWeb/HTML/Parser/HTMLToken.h>
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								#include <LibWeb/HTML/Parser/SpeculativeHTMLParser.h>
-												LibWeb: Parse declarative shadow DOM template elements

We now honor the shadowrootmode attribute on template elements while
parsing, and instantiate a shadow tree as required by the spec.

											
										
										
											2024-06-25 09:43:50 +02:00
+								#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
-												LibWeb: Split out SimilarOriginWindowAgent from HTML::Agent

To allow for adding the concept of a WorkerAgent to be reused
between shared and dedicated workers. An event loop is the
commonality between the different agent types, though, there
are some differences between those event loops which we customize
on the construction of the HTML::EventLoop.

											
										
										
											2025-04-24 15:04:13 +12:00
+								#include <LibWeb/HTML/Scripting/SimilarOriginWindowAgent.h>
-												LibWeb: Move Window from DOM directory & namespace to HTML

The Window object is part of the HTML spec. :^)
https://html.spec.whatwg.org/multipage/window-object.html

											
										
										
											2022-03-07 23:08:26 +01:00
+								#include <LibWeb/HTML/Window.h>
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								#include <LibWeb/HTMLTokenizerRustFFI.h>
-												LibWeb: Rename HighResolutionTime/{CoarsenTime => TimeOrigin}.cpp/h

This is being used for more than just time coarsening now, so let's use
the spec's section title for the name.

											
										
										
											2022-10-04 21:30:29 +01:00
+								#include <LibWeb/HighResolutionTime/TimeOrigin.h>
-												LibWeb: Replace incorrect uses of AK::is_ascii_space()

											
										
										
											2022-10-01 18:14:32 +01:00
+								#include <LibWeb/Infra/CharacterTypes.h>
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								#include <LibWeb/Infra/Strings.h>
-												LibWeb: Add namespace to Element

											
										
										
											2020-10-10 02:48:05 +01:00
+								#include <LibWeb/Namespace.h>
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								#include <LibWeb/Platform/EventLoopPlugin.h>
-												LibWeb: Add support for inline SVG element scripts

											
										
										
											2023-09-26 01:12:21 +13:00
+								#include <LibWeb/SVG/SVGScriptElement.h>
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
-												LibWeb: Move HTML classes into the Web::HTML namespace

											
										
										
											2020-07-28 18:20:36 +02:00
+								namespace Web::HTML {
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								GC_DEFINE_ALLOCATOR(HTMLParser);
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								GC_DEFINE_ALLOCATOR(HTMLParserEndState);
-												LibWeb: Put most LibWeb GC objects in type-specific heap blocks

With this change, we now have ~1200 CellAllocators across both LibJS and
LibWeb in a normal WebContent instance.

This gives us a minimum heap size of 4.7 MiB in the scenario where we
only have one cell allocated per type. Of course, in practice there will
be many more of each type, so the effective overhead is quite a bit
smaller than that in practice.

I left a few types unconverted to this mechanism because I got tired of
doing this. :^)

											
										
										
											2023-11-19 19:47:52 +01:00
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								static DOM::Node& node_from_html_parser_ffi(size_t);
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								static HTMLParser& parser_from_html_parser_ffi(void*);
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional<FlyString> const&);
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional<FlyString> const&);
 								static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode);
 								extern "C" void ladybird_html_parser_log_parse_error(void*, u8 const*, size_t);
 								extern "C" void ladybird_html_parser_stop_parsing(void*);
 								extern "C" bool ladybird_html_parser_parse_errors_enabled();
 								extern "C" void ladybird_html_parser_visit_node(void*, size_t);
 								extern "C" size_t ladybird_html_parser_document_node(void*);
 								extern "C" size_t ladybird_html_parser_document_html_element(void*);
 								extern "C" void ladybird_html_parser_set_document_quirks_mode(void*, RustFfiHtmlQuirksMode);
 								extern "C" size_t ladybird_html_parser_create_document_type(void*, u8 const*, size_t, u8 const*, size_t, u8 const*, size_t);
 								extern "C" size_t ladybird_html_parser_create_comment(void*, u8 const*, size_t);
 								extern "C" void ladybird_html_parser_insert_text(size_t, size_t, u8 const*, size_t);
 								extern "C" void ladybird_html_parser_add_missing_attribute(size_t, u8 const*, size_t, u8 const*, size_t);
 								extern "C" void ladybird_html_parser_remove_node(size_t);
 								extern "C" void ladybird_html_parser_handle_element_popped(size_t);
 								extern "C" void ladybird_html_parser_prepare_svg_script(void*, size_t, size_t);
 								extern "C" void ladybird_html_parser_set_script_source_line(void*, size_t, size_t);
 								extern "C" void ladybird_html_parser_mark_script_already_started(void*, size_t);
 								extern "C" size_t ladybird_html_parser_parent_node(size_t);
 								extern "C" size_t ladybird_html_parser_create_element(void*, size_t, RustFfiHtmlNamespace, u8 const*, size_t, u8 const*, size_t, RustFfiHtmlParserAttribute const*, size_t, bool, size_t, bool);
 								extern "C" void ladybird_html_parser_append_child(size_t, size_t);
 								extern "C" void ladybird_html_parser_insert_node(size_t, size_t, size_t, bool);
 								extern "C" void ladybird_html_parser_move_all_children(size_t, size_t);
 								extern "C" size_t ladybird_html_parser_template_content(size_t);
 								extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t, RustFfiHtmlShadowRootMode, RustFfiHtmlSlotAssignmentMode, bool, bool, bool, bool);
 								extern "C" void ladybird_html_parser_set_template_content(size_t, size_t);
 								extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t);
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, StringView input, StringView encoding)
-												LibWeb: Plumb content encoding into the new HTML parser

We still don't handle non-ASCII input correctly, but at least now we'll
convert e.g ISO-8859-1 to UTF-8 before starting to tokenize.
This patch also makes "view source" work with the new parser. :^)

											
										
										
											2020-05-28 12:35:19 +02:00
+								    : m_tokenizer(input, encoding)
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    , m_scripting_mode(scripting_mode)
-												LibWeb: Stop creating transient throwaway JS::Handles in HTML parser

These were being immediately stored in JS::GCPtrs (and dutifully visited
by HTMLParser), so creating temporary handles for them was a complete
waste of time.

											
										
										
											2024-07-20 14:38:32 +02:00
+								    , m_document(document)
-												test-web: Add ability to change page mid-test

This allows you to not have to write a separate test file
for the same thing but in a different situation.

This doesn't handle when you change the page with location.href
however.

Changes the name of the page load handlers to prevent confusion
with this.
											
										
										
											2020-07-24 21:24:11 +01:00
+								{
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    m_rust_parser = rust_html_parser_create();
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								    m_document->set_parser({}, *this);
-												LibTextCodec: Use Optional<String> for get_standardized_encoding

This patch changes get_standardized_encoding to use an Optional<String>
return type instead of just returning the null string when unable to
match the provided encoding to one of the canonical encoding names.

This is part of an effort to move away from using null strings towards
explicitly using Optional<String> to indicate that the String may not
have a value.

											
										
										
											2021-05-11 15:52:25 +02:00
+								    auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
 								    VERIFY(standardized_encoding.has_value());
-												LibWeb: Port Document interface from DeprecatedString to String

											
										
										
											2023-09-15 21:46:58 +12:00
+								    m_document->set_encoding(MUST(String::from_utf8(standardized_encoding.value())));
-												test-web: Add ability to change page mid-test

This allows you to not have to write a separate test file
for the same thing but in a different situation.

This doesn't handle when you change the page with location.href
however.

Changes the name of the page load handlers to prevent confusion
with this.
											
										
										
											2020-07-24 21:24:11 +01:00
+								}
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, ScriptCreatedParser script_created)
 								    : m_scripting_mode(scripting_mode)
-												LibWeb: Track whether HTMLParser is script-created

Add a ScriptCreatedParser flag plumbed through HTMLParser's constructor
and create_for_scripting(). Only document.open()'s parser sets it to
Yes. Document::close() step 3 now checks is_script_created() so it
correctly skips parsers that weren't created via document.open(),
matching the spec.

Previously the check was just `if (!m_parser)`, which incorrectly let
document.close() insert an EOF into a network-driven parser. The bug
was mostly latent because the network parser used to finish quickly,
but it matters once the network parser stays alive for the duration of
a streamed parse.

											
										
										
											2026-04-28 19:47:49 +02:00
+								    , m_script_created(script_created == ScriptCreatedParser::Yes)
-												LibWeb: Stop creating transient throwaway JS::Handles in HTML parser

These were being immediately stored in JS::GCPtrs (and dutifully visited
by HTMLParser), so creating temporary handles for them was a complete
waste of time.

											
										
										
											2024-07-20 14:38:32 +02:00
+								    , m_document(document)
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
+								{
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    m_rust_parser = rust_html_parser_create();
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								    m_document->set_parser({}, *this);
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
+								}
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								HTMLParser::~HTMLParser() = default;
 								void HTMLParser::finalize()
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								{
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								    Base::finalize();
 								    if (m_rust_parser) {
 								        rust_html_parser_destroy(m_rust_parser);
 								        m_rust_parser = nullptr;
 								    }
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								}
-												LibWeb: Make the HTMLParser GC-allocated

This prevents a reference cycle between a HTMLParser opened via
document.open() and the document. It was one of many things keeping
some documents alive indefinitely.

											
										
										
											2022-10-17 10:46:11 +02:00
+								void HTMLParser::visit_edges(Cell::Visitor& visitor)
 								{
 								    Base::visit_edges(visitor);
 								    visitor.visit(m_document);
 								    visitor.visit(m_form_element);
 								    visitor.visit(m_context_element);
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								    visitor.visit(m_active_speculative_html_parser);
-												LibWeb: Make the HTMLParser GC-allocated

This prevents a reference cycle between a HTMLParser opened via
document.open() and the document. It was one of many things keeping
some documents alive indefinitely.

											
										
										
											2022-10-17 10:46:11 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    rust_html_parser_visit_edges(m_rust_parser, &visitor);
-												LibWeb: Make the HTMLParser GC-allocated

This prevents a reference cycle between a HTMLParser opened via
document.open() and the document. It was one of many things keeping
some documents alive indefinitely.

											
										
										
											2022-10-17 10:46:11 +02:00
+								}
-												LibWeb: Store a SpeculativeHTMLParser on the HTML Parser

The parser was previously added, but unused. Actually attaching one to
the HTML Parser will let us test the limits of Swift interop.

											
										
										
											2025-04-04 09:20:27 -06:00
+								void HTMLParser::initialize(JS::Realm& realm)
 								{
 								    Base::initialize(realm);
 								}
-												LibWeb: Stop parsing after `document.write` at the insertion point

If a call to `document.write` inserts an incomplete HTML tag, e.g.:

    document.write("<p");

we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.

											
										
										
											2024-02-18 12:45:53 -05:00
+								void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								{
-												LibWeb: Reset the "stop parsing" flag when entering HTML parser

Otherwise we'll always bail after processing one token, which is not
what we want.

											
										
										
											2024-11-23 16:28:35 +01:00
+								    m_stop_parsing = false;
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								    for (;;) {
-												LibWeb: Replace spin_until in HTMLParser::handle_text with async resume

Spinning a nested event loop to wait for a parser-blocking script blocks
the calling thread, can deadlock, and creates reentrancy hazards. Switch
to an event-driven pause/resume model, mirroring the prior
HTMLParserEndState refactor (df96b69e7a).

Three WPT document.write tests flip from Fail to Pass and are
rebaselined: all write an external script via document.write() followed
by inline content. With spin_until, control did not return to the caller
of document.write() between writing the script and observing its effects
so the test's order assertions saw a different sequence than the spec
mandates.

											
										
										
											2026-04-25 23:59:12 +02:00
+								        if (m_parser_pause_flag)
 								            break;
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								        auto result = rust_html_parser_run_document(
 								            m_rust_parser,
 								            m_tokenizer.ffi_handle({}),
 								            this,
 								            m_scripting_mode != ParserScriptingMode::Disabled,
 								            stop_at_insertion_point == HTMLTokenizer::StopAtInsertionPoint::Yes);
 								        if (result == RustFfiHtmlParserRunResult::Ok)
-												LibWeb: Fire a DOMContentLoaded event when the new parser is finished

With this change, we can finally load and render welcome.html :^)

											
										
										
											2020-05-27 23:32:50 +02:00
+								            break;
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								        if (result == RustFfiHtmlParserRunResult::ExecuteScript) {
 								            auto script = rust_html_parser_take_pending_script(m_rust_parser);
 								            VERIFY(script);
 								            process_script_end_tag_from_rust_parser(as<HTMLScriptElement>(node_from_html_parser_ffi(script)));
 								            continue;
-												LibWeb: Remember when HTML parser should ignore next line feed character

There's a quirk in HTML where the parser should ignore any line feed
character immediately following a `pre` or `textarea` start tag.

This was working fine when we could peek ahead in the input stream and
see the next token, but didn't work in character-at-a-time parsing with
document.write().

This commit adds the "can ignore next line feed character" as a parser
flag that is maintained across invocations, making it work in this
parsing mode as well.

20 new passes in WPT/html/syntax/parsing/ :^)

											
										
										
											2025-02-19 16:54:28 +01:00
+								        }
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								        if (result == RustFfiHtmlParserRunResult::ExecuteSvgScript) {
 								            auto script = rust_html_parser_take_pending_svg_script(m_rust_parser);
 								            VERIFY(script);
 								            if (process_svg_script_end_tag_from_rust_parser(as<SVG::SVGScriptElement>(node_from_html_parser_ffi(script))))
 								                break;
 								            continue;
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
+								        }
-												LibWeb: Add a way to stop the new HTML parser

Some things are specced to "stop parsing", which basically just means
to stop fetching tokens and jump to "The end"

											
										
										
											2020-05-28 18:55:18 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								        VERIFY_NOT_REACHED();
-												LibWeb: Start implementing character token parsing

Now that we've gotten rid of the misguided character buffering in the
tokenizer, it actually spits out character tokens that we have to deal
with in the parser.

This patch implements enough to bring us back to speed with simple.html

											
										
										
											2020-05-24 19:51:50 +02:00
+								    }
-												LibWeb: Fire a DOMContentLoaded event when the new parser is finished

With this change, we can finally load and render welcome.html :^)

											
										
										
											2020-05-27 23:32:50 +02:00
-												LibWeb: Throw out decoded UTF-32 data in HTMLTokenizer after parser runs

This ends up saving quite a bit of memory on many pages, since UTF-32
uses 4 bytes per code points.

As an example, it reduces the footprint on https://gymgrossisten.com/
by 2 MiB.

											
										
										
											2025-10-23 21:45:00 +02:00
+								    m_tokenizer.parser_did_run({});
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
+								}
-												LibWeb: Buffer text node character insertions in the new parser

Instead of appending character-at-a-time, we now buffer character
insertions in a StringBuilder, and flush them to the relevant node
whenever we start inserting into a new node (and when parsing ends.)

											
										
										
											2020-06-03 21:53:08 +02:00
-												Everywhere: Change west consts caught by clang-format-21 to east consts

											
										
										
											2025-08-29 13:02:52 +01:00
+								void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
+								{
 								    m_document->set_url(url);
-												LibWeb: Let HTMLTokenizer walk over code points instead of UTF-8

Instead of using UTF-8 iterators to traverse the HTMLTokenizer input
stream one code point at a time, we now do a one-shot conversion up
front from the input encoding to a Vector<u32> of Unicode code points.

This simplifies the tokenizer logic somewhat, and ends up being faster
as well, so win-win.

1.02x speedup on Speedometer 2.1

											
										
										
											2025-05-10 11:28:35 +02:00
+								    m_document->set_source(m_tokenizer.source());
-												LibWeb: Extract HTMLParser::run_until_completion()

Pull the post-parse-action setup, run loop, and post-parse invocation
out of HTMLParser::run(URL, ...) into a new run_until_completion()
method. The URL overload still calls it; behavior is unchanged. The
incremental parser will use this entry point directly without going
through the URL-setting overload.

											
										
										
											2026-04-28 19:49:17 +02:00
+								    run_until_completion(stop_at_insertion_point);
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								void HTMLParser::pop_all_open_elements()
 								{
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    rust_html_parser_pop_all_open_elements(m_rust_parser);
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								}
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								void HTMLParser::configure_element_created_by_rust_parser(DOM::Element& element)
 								{
 								    if (element.local_name() == HTML::TagNames::link && element.namespace_uri() == Namespace::HTML) {
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								        // AD-HOC: Let <link> elements know which document they were originally parsed for.
 								        //         This is used for the render-blocking logic.
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								        auto& link_element = as<HTMLLinkElement>(element);
 								        link_element.set_parser_document({}, document());
 								        link_element.set_was_enabled_when_created_by_parser({}, !element.has_attribute(HTML::AttributeNames::disabled));
 								        return;
 								    }
 								    if (element.local_name() != HTML::TagNames::script || element.namespace_uri() != Namespace::HTML)
 								        return;
 								    auto& script_element = as<HTMLScriptElement>(element);
 								    if (m_scripting_mode != ParserScriptingMode::Fragment)
 								        script_element.set_parser_document(Badge<HTMLParser> {}, document());
 								    script_element.set_force_async(Badge<HTMLParser> {}, false);
 								    if (m_scripting_mode == ParserScriptingMode::Inert)
 								        script_element.set_already_started(Badge<HTMLParser> {}, true);
 								}
 								GC::Ref<DOM::Element> HTMLParser::create_element_for_rust_parser(HTMLToken const& token, Optional<FlyString> const& namespace_, DOM::Node& intended_parent, bool had_duplicate_attribute, GC::Ptr<HTMLFormElement> form_element, bool has_template_element_on_stack)
 								{
 								    auto element = create_element_for(token, namespace_, intended_parent);
 								    configure_element_created_by_rust_parser(element);
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    // AD-HOC: See AD-HOC comment on Element.m_had_duplicate_attribute_during_tokenization about why this is done.
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    if (had_duplicate_attribute)
 								        element->set_had_duplicate_attribute_during_tokenization({});
 								    if (form_element && !has_template_element_on_stack) {
 								        auto* html_element = as_if<HTML::HTMLElement>(*element);
 								        if (html_element && html_element->is_form_associated_element() && !html_element->is_form_associated_custom_element()) {
 								            if ((!html_element->is_listed() || !html_element->has_attribute(HTML::AttributeNames::form))
 								                && &intended_parent.root() == &form_element->root()) {
 								                html_element->set_form(form_element.ptr());
 								                html_element->set_parser_inserted({});
 								            }
 								        }
 								    }
 								    return element;
 								}
 								bool HTMLParser::process_script_end_tag_from_rust_parser(HTMLScriptElement& script)
 								{
 								    // If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint.
 								    // The active speculative HTML parser is null here; start/stop are paired around the spin_until below.
 								    auto& vm = main_thread_event_loop().vm();
 								    if (!vm.has_running_execution_context())
 								        perform_a_microtask_checkpoint();
 								    // Let the old insertion point have the same value as the current insertion point.
 								    m_tokenizer.store_old_insertion_point();
 								    // Let the insertion point be just before the next input character.
 								    m_tokenizer.update_insertion_point();
 								    // Increment the parser's script nesting level by one.
 								    increment_script_nesting_level();
 								    // https://w3c.github.io/trusted-types/dist/spec/#setting-slot-values-from-parser
 								    // Set script’s script text value to its child text content.
 								    script.set_string_text(script.child_text_content());
 								    // If the active speculative HTML parser is null, then prepare the script element script.
 								    // This might cause some script to execute, which might cause new characters to be inserted into the tokenizer,
 								    // and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser.
 								    // The active speculative HTML parser is null here (see above).
 								    script.prepare_script(Badge<HTMLParser> {});
 								    // Decrement the parser's script nesting level by one.
 								    decrement_script_nesting_level();
 								    // If the parser's script nesting level is zero, then set the parser pause flag to false.
 								    if (script_nesting_level() == 0)
 								        m_parser_pause_flag = false;
 								    // Let the insertion point have the value of the old insertion point.
 								    m_tokenizer.restore_old_insertion_point();
 								    // At this stage, if the pending parsing-blocking script is not null, then:
 								    if (document().pending_parsing_blocking_script()) {
 								        // -> If the script nesting level is not zero:
 								        if (script_nesting_level() != 0) {
 								            // Set the parser pause flag to true,
 								            m_parser_pause_flag = true;
 								            // and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller.
 								            // (Tokenization will resume when the caller returns to the "outer" tree construction stage.)
 								            return true;
 								        }
 								        // -> Otherwise:
 								        // The spec's "While the pending parsing-blocking script is not null" loop and the contained "spin the event
 								        // loop" step are implemented asynchronously: pause the parser, schedule a resume check, and yield back to
 								        // the caller. The remaining steps (4-13) run from resume_after_parser_blocking_script when the script is
 								        // ready.
 								        // 3. Start the speculative HTML parser for this instance of the HTML parser.
 								        start_the_speculative_html_parser();
 								        m_parser_pause_flag = true;
 								        schedule_resume_check();
 								    }
 								    return m_parser_pause_flag;
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								void HTMLParser::prepare_svg_script_for_rust_parser(SVG::SVGScriptElement& script, size_t source_line_number)
 								{
 								    // AD-HOC: For SVG script elements, set the parser-inserted flag before the element is inserted into the DOM.
 								    // Otherwise inserted()/attribute_changed() would invoke process_the_script_element() with the flag still unset
 								    // and bypass the parser-blocking fetch handling.
 								    //
 								    // https://html.spec.whatwg.org/multipage/parsing.html#scripting-mode
 								    // The Fragment scripting mode treats parser-inserted scripts as if they were not parser-inserted, allowing, for
 								    // example, executing scripts when applying a fragment created by createContextualFragment().
 								    if (m_scripting_mode != ParserScriptingMode::Fragment)
 								        script.set_parser_inserted({});
 								    script.set_source_line_number({}, source_line_number);
 								}
 								void HTMLParser::set_script_source_line_from_rust_parser(DOM::Element& element, size_t source_line_number)
 								{
 								    if (auto* html_script_element = as_if<HTML::HTMLScriptElement>(element)) {
 								        html_script_element->set_source_line_number({}, source_line_number);
 								        return;
 								    }
 								    if (auto* svg_script_element = as_if<SVG::SVGScriptElement>(element))
 								        svg_script_element->set_source_line_number({}, source_line_number);
 								}
 								void HTMLParser::mark_script_already_started_from_rust_parser(HTMLScriptElement& script)
 								{
 								    script.set_already_started(Badge<HTMLParser> {}, true);
 								}
 								void HTMLParser::stop_parsing_from_rust_parser()
 								{
 								    stop_parsing();
 								}
 								bool HTMLParser::process_svg_script_end_tag_from_rust_parser(SVG::SVGScriptElement& script)
 								{
 								    // Let the old insertion point have the same value as the current insertion point.
 								    m_tokenizer.store_old_insertion_point();
 								    // Let the insertion point be just before the next input character.
 								    m_tokenizer.update_insertion_point();
 								    // Increment the parser's script nesting level by one.
 								    increment_script_nesting_level();
 								    // Set the parser pause flag to true.
 								    m_parser_pause_flag = true;
 								    // If the active speculative HTML parser is null and the user agent supports SVG, then Process the SVG script element according to the SVG rules. [SVG]
 								    // The active speculative HTML parser is null here.
 								    script.process_the_script_element();
 								    // Decrement the parser's script nesting level by one.
 								    decrement_script_nesting_level();
 								    // If the parser's script nesting level is zero, then set the parser pause flag to false.
 								    if (script_nesting_level() == 0)
 								        m_parser_pause_flag = false;
 								    // Let the insertion point have the value of the old insertion point.
 								    m_tokenizer.restore_old_insertion_point();
 								    // If the SVG script registered itself as a pending parsing-blocking script (external fetch in flight),
 								    // pause the parser and schedule a resume check. The parser will resume from
 								    // resume_after_parser_blocking_script when the fetch completes.
 								    if (document().pending_parsing_blocking_svg_script()) {
 								        m_parser_pause_flag = true;
 								        schedule_resume_check();
 								    }
 								    return m_parser_pause_flag;
 								}
-												LibWeb: Extract HTMLParser::run_until_completion()

Pull the post-parse-action setup, run loop, and post-parse invocation
out of HTMLParser::run(URL, ...) into a new run_until_completion()
method. The URL overload still calls it; behavior is unchanged. The
incremental parser will use this entry point directly without going
through the URL-setting overload.

											
										
										
											2026-04-28 19:49:17 +02:00
+								void HTMLParser::run_until_completion(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
 								{
-												LibWeb: Replace spin_until in HTMLParser::handle_text with async resume

Spinning a nested event loop to wait for a parser-blocking script blocks
the calling thread, can deadlock, and creates reentrancy hazards. Switch
to an event-driven pause/resume model, mirroring the prior
HTMLParserEndState refactor (df96b69e7a).

Three WPT document.write tests flip from Fail to Pass and are
rebaselined: all write an external script via document.write() followed
by inline content. With spin_until, control did not return to the caller
of document.write() between writing the script and observing its effects
so the test's order assertions saw a different sequence than the spec
mandates.

											
										
										
											2026-04-25 23:59:12 +02:00
+								    m_post_parse_action = [this] { the_end(*m_document, this); };
-												LibWeb: Stop parsing after `document.write` at the insertion point

If a call to `document.write` inserts an incomplete HTML tag, e.g.:

    document.write("<p");

we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.

											
										
										
											2024-02-18 12:45:53 -05:00
+								    run(stop_at_insertion_point);
-												LibWeb: Replace spin_until in HTMLParser::handle_text with async resume

Spinning a nested event loop to wait for a parser-blocking script blocks
the calling thread, can deadlock, and creates reentrancy hazards. Switch
to an event-driven pause/resume model, mirroring the prior
HTMLParserEndState refactor (df96b69e7a).

Three WPT document.write tests flip from Fail to Pass and are
rebaselined: all write an external script via document.write() followed
by inline content. With spin_until, control did not return to the caller
of document.write() between writing the script and observing its effects
so the test's order assertions saw a different sequence than the spec
mandates.

											
										
										
											2026-04-25 23:59:12 +02:00
+								    if (!m_parser_pause_flag)
 								        invoke_post_parse_action();
-												LibWeb: Split out "The end" from the HTML parsing spec to a function

Also add a spec link and some comments.

											
										
										
											2021-09-26 00:00:00 +02:00
+								}
 								// https://html.spec.whatwg.org/multipage/parsing.html#the-end
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								void HTMLParser::the_end(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
-												LibWeb: Split out "The end" from the HTML parsing spec to a function

Also add a spec link and some comments.

											
										
										
											2021-09-26 00:00:00 +02:00
+								{
 								    // Once the user agent stops parsing the document, the user agent must run the following steps:
-												LibWeb: Make HTMLParser::the_end() callable from outside

This is a little awkward: The spec requires when loading media documents
or ones that don't have a DOM, that we "act as if the user agent had
stopped parsing document" which means following this algorithm. Only a
few steps require an HTMLParser, but those that do, involve reaching
into its internals. The simplest solution I could think of (other than
duplicating this fairly hefty function) is making it static and taking
a Document and optional HTMLParser as parameters.

											
										
										
											2023-12-19 12:51:34 +00:00
+								    // NOTE: This is a static method because the spec sometimes wants us to "act as if the user agent had stopped
 								    //       parsing document" which means running these steps without an HTML Parser. That makes it awkward to call,
 								    //       but it's preferable to duplicating so much code.
 								    if (parser)
 								        VERIFY(document == parser->m_document);
-												LibWeb: Return from "the end" during HTML fragment parsing

This will examine the algorithm known as "the end" from the HTML
specification, which executes when parsing HTML markup has completed,
and it's potential to observably run script or change certain
attributes.

This currently executes in our engine when parsing HTML received from
the internet during navigation, using document.{open,write,close},
setting the innerHTML attribute or using DOMParser. The latter two are
only possible by executing script.

This has been causing some issues in our engine, which will be shown
later, so we are considering removing the call to "the end" for these
two cases.

Spoiler: the implications of running "the end" for DOMParser will be
considered in the future. It is the only script-created HTML/XML parser
remaining after this commit that uses "the end", including it's XML
variant implemented as XMLDocumentBuilder::document_end().

This will only focus on setting the innerHTML attribute, which falls
under "HTML fragment parsing", which starts here in the specification:
https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L3491

While you may notice our HTMLParser::parse_html_fragment returns `void`
and assume this means no scripts are executed because of our use of
`WebIDL::ExceptionOr<T>` and `JS::ThrowCompletionOr<T>`, note that
dispatched events will execute arbitrary script via a callback, catch
any exceptions, report them and not propagate them. This means that
while a function does not return an exception type, it can still
potentially execute script.

A breakdown of the steps of "the end" in the context of HTML fragment
parsing and its observability follows:
https://html.spec.whatwg.org/multipage/parsing.html#the-end
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L221

1. No-op, as we don't currently have speculative HTML parsing. Even if
   we did, we would instantly return after stopping the speculative
   HTML parser anyway.

2. No-op, document.{open,write,close} are not accessible from the
   temporary document.

3. No-op, document.readyState, window.navigation.timing and the
   readystatechange event are not accessible from the created temporary
   document.

4. This is presumably done so that reentrant invocation of the HTML
   parser from document.{write,close} during the firing of the events
   after step 4 ends up parsing from a clean state. This is a no-op, as
   the events after step 4 do not fire and are not accessible.

5. No-op, we set HTMLScriptElement::m_already_started to true when
   creating it whilst parsing an HTML fragment, which causes
   HTMLScriptElement::prepare_script to instantly bail, meaning
   `scripts_to_execute_when_parsing_has_finished` is always empty.

6. No-op, tasks are considered not runnable when the document does not
   have a browsing context, which is always the case in fragment
   parsing. Additionally, window.navigation.timing and the
   DOMContentLoaded event aren't reachable from the temporary document.

7. Almost a no-op, `scripts_to_execute_as_soon_as_possible` is always
   empty for the same reason as step 4. However, this step uses an
   unconditional `spin_until` call, which _is_ observable and causes
   one of the alluded to issues, which will be talked about later.

8. No-op, as delaying the load event has no purpose in this case, as
   the task in step 9 will set the current document readiness to
   "complete" and then return immediately after, as the temporary
   document has no browsing context, skipping the Window load event.
   However, this step causes another alluded to issue, which will be
   talked about later.

9. No-op, for the same reason as step 6. Additionally,
   document.readyState is not accessible from the temporary document
   and the temporary document has no browsing context, so navigation
   timing, the Window load event, the pageshow event, the Document load
   event and the `<iframe>` load steps are not executed at all.

10. No-op, as this flag is only set from window.print(), which is not
    accessible for this document.

11. No-op, as the temporary document is not accessible from anything
    else and will be immediately destroyed after HTML fragment parsing.

Additionally, browsing context containers (`<iframe>`, `<frame>` and
`<object>`) cannot run in documents with no browsing context:

- `<iframe>` and `<frame>` use "create a new child navigable":
https://html.spec.whatwg.org/multipage/document-sequences.html#create-a-new-child-navigable
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/BrowsingContextContainer.cpp#L43-L45

> 2. Let group be element's node document's browsing context's
     top-level browsing context's group.

This requires the element's node document's browsing context to be
non-null, but it is always null with the temporary document created for
HTML fragment parsing.

This is protected against here for `<iframe>`:
https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element:the-iframe-element-6
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLIFrameElement.cpp#L45

> When an iframe element element is inserted into a document whose
  browsing context is non-null, the user agent must run these steps:
  1. Create a new child navigable for element.

This is currently not protected against for `<frame>` in the
specification:
https://html.spec.whatwg.org/multipage/obsolete.html#active-frame-element

> A frame element is said to be an active frame element when it is in a
  document.

> When a frame element element is created as an active frame element,
  or becomes an active frame element after not having been one, the
  user agent must run these steps:
>     1. Create a new child navigable for element.

However, since this would cause a null dereference, this is actually a
specification issue. See: https://github.com/whatwg/html/issues/9136

- `<object>` uses "queue an element task" and has a browsing context
  null check.
https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element:queue-an-element-task
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLObjectElement.cpp#L58
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLObjectElement.cpp#L105

> ...the user agent must queue an element task on the DOM manipulation
  task source given the object element to run the following steps to
  (re)determine what the object element represents.

As established above, tasks are not runnable in documents with null
browsing contexts. However, for avoidance of doubt, it checks if the
document's browsing context is null, and if so, it falls back to
representing the element's children and gets rid of any child navigable
the `<object>` element may have.

> 2. If the element has an ancestor media element, or has an ancestor
     object element that is not showing its fallback content, or if the
     element is not in a document whose browsing context is non-null,
     or if the element's node document is not fully active, or if the
     element is still in the stack of open elements of an HTML parser
     or XML parser, or if the element is not being rendered, then jump
     to the step below labeled fallback.

> 4. Fallback: The object element represents the element's children.
     This is the element's fallback content. Destroy a child navigable
     given the element.

This check also protects against an `<object>` element being adopted
from a document which has a browsing context to one that doesn't during
the time between the element task being queued and then executed.

This means a browsing context container cannot be ran, meaning browsing
context containers cannot access their parent document and access the
properties and events mentioned in steps 1-11 above, or use
document.{open,write,close} on the parent document.

Another potential avenue of running script via HTML fragment parsing
is via custom elements being in the markup, which need to be
synchronously upgraded. For example:
```
<custom-element></custom-element>
```

However, this is already protected against in the spec:
https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L643

> 7. If definition is non-null and the parser was not created as part
     of the HTML fragment parsing algorithm, then let will execute
     script be true. Otherwise, let it be false.

It is protected against overall by disabling custom elements via
returning `null` for all custom element definition lookups if the
document has no browsing context, which is the case for the temporary
document:
https://html.spec.whatwg.org/multipage/custom-elements.html#look-up-a-custom-element-definition
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Document.cpp#L2106-L2108

> 2. If document's browsing context is null, return null.

This is because the document doesn't have an associated Window, meaning
there will be no associated CustomElementRegistry object.

After running the HTML fragment parser, all of the child nodes are
removed the temporary document and then adopted into the context
element's node document. Skipping the `pre_remove` steps as they are
not relevant in this case, let's first examine Node::remove()'s
potential to execute script, then examine Document::adopt_node() after.
https://dom.spec.whatwg.org/#concept-node-remove
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Node.cpp#L534

1-7. Does not run any script, it just keeps a copy of some data that
     will be needed later in the algorithm and directly modifies live
     range attributes. However, since this relies on Range objects
     containing the temporary document, the Range steps are no-ops.

8. Though this uses the temporary document, it does not contain any
   NodeIterator objects as no script should have run, thus this
   callback will not be entered. Even if the document _did_ have
   associated NodeIterators, NodeIterator::run_pre_removing_steps does
   not execute any script.

9-11. Does not run any script, it just keeps a copy of some data that
      will be needed later in the algorithm and performs direct tree
      mutation to remove the node from the node tree.

12-14. "assign slottables" and step 13 queue mutation observer
       microtasks via "signal a slot change". However, since this is
       done _after_ running "the end", the "spin the event loop" steps
       in that algorithm does not affect this. Remember that queued
       microtasks due not execute during this algorithm for the next
       few steps.

Sidenote:
Microtasks are supposed to be executed when the JavaScript execution
context stack is empty. Since HTMLParser::parse_html_fragment is only
called from script, the stack will never be empty whilst it is running,
so microtasks will not run until some time after we exit this function.

15. This could potentially run script, let's have a look at the
    removal steps we currently have implemented in our engine:

- HTMLIFrameElement::removed_from()
  https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element:the-iframe-element-7
  https://github.com/SerenityOS/serenity/blob/44cf92616e59bda951b67cdae78a6361bdd76f7a/Userland/Libraries/LibWeb/HTML/HTMLIFrameElement.cpp#L102

  Since browsing context containers cannot create child browsing
  contexts (as shown above), this code will do nothing. This will also
  hold true when we implement HTMLFrameElement::removed_from() in the
  future.

- FormAssociatedElement::removed_from()
  https://github.com/SerenityOS/serenity/blob/44cf92616e59bda951b67cdae78a6361bdd76f7a/Userland/Libraries/LibWeb/HTML/FormAssociatedElement.h#L36
  
  This calls `form_node_was_removed` which can then potentially call
  `reset_form_owner`. However, `reset_form_owner` only does tree
  traversal to find the appropriate form owner and does not execute
  any script. After calling `form_node_was_removed` it then calls
  `form_associated_element_was_removed`, which is a virtual function
  that no one currently overrides, meaning no script is executed.

- HTMLBaseElement::removed_from()
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLBaseElement.cpp#L45
  
  This will call `Document::update_base_element` to do tree traversal
  to find out the new first `<base>` element with an href attribute and
  thus does not execute any script.

- HTMLStyleElement::removed_from()
  https://html.spec.whatwg.org/multipage/semantics.html#update-a-style-block
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLStyleElement.cpp#L49
  
  This will call `update_a_style_block`, which will parse the `<style>`
  element's text content as CSS and create a style sheet from it. This
  does not execute any script.
  
In summary, step 15 does not currently execute any script and ideally
shouldn't in the future when we implement more `removed_from` steps.

16. Does not run any script, just saves a copy of a variable.

17. Queues a "disconnectedCallback" custom elements callback. This will
    execute script in the future, but not here.
    
18. Performs step 15 and 17 in combination for each of the node's
    descendants. This will not execute any script.
    
19. Does not run any script, it performs a requirement of mutation
    observers by adding certain things to a list.

20. Does not execute any script, as mutation observer callbacks are
    done via microtasks.

21. This will not execute script, as the parent is always the temporary
    document in HTML fragment parsing. There is no Document children
    changed steps, so this step is a no-op.
    
We then do layout invalidation which is our own addition, but this also
does not execute any script.

In short, removing a node does not execute any script. It could execute
script in the future, but since this is done by tasks, it will not
execute until we are outside of HTMLParser::parse_html_fragment.

Let's look at adopting a node:
https://dom.spec.whatwg.org/#concept-node-adopt
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Document.cpp#L1414

1. Does not run script, it just keeps a reference to the temporary
   document.

2. No-op, we removed the node above.

3.1. Does not execute script, it simply updates all descendants of
     the removed node to be in the context element's node document.

3.2. Does not execute script, see node removal step 17.

3.3. This could potentially execute script, let's have a look at the
     adopting steps we have implemented in our engine:

- HTMLTemplateElement::adopted_from()
  https://html.spec.whatwg.org/multipage/scripting.html#the-template-element:concept-node-adopt-ext
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLTemplateElement.cpp#L38

  This simply adopts the `<template>` element's DocumentFragment node
  into its inert document. This does not execute any script.
  
We then have our own addition of adopting NodeIterators over to the
context element's document, but this does not execute any script.

In short, adopting a node does not execute any script.

After adopting the nodes to the context element's document, HTML
fragment parsing is complete and the temporary document is no longer
accessible at all.

Document and element event handlers are also not accessible, even if
the event bubbles. This is simply because the temporary document is not
accessible, so tree traversal, IDL event handler attributes and
EventTarget#addEventListener are not accessible, on the document or any
descendants. Document is also not an Element, so element event handler
attributes do not apply.

In summary, this establishes that HTML fragment parsers should not run
any user script or internal C++ code that relies on things set up by
"the end". This means that the attributes set up and events fired by
"the end" are not observable in this case. This may have not explored
every single possible avenue, but the general assertion should still
hold. However, this assertion is violated by "the end" containing two
unconditional "spin the event loop" invocations and causes issues with
live web content, so we seek to avoid them.

As WebKit, Blink and Gecko have been able to get away with doing fast
path optimizations for HTML fragment parsing which don't setup
navigation timing, run events, etc. it is presumed we are able to get
away with not running "the end" for HTML fragment parsing as well.
WebKit: https://github.com/WebKit/WebKit/blob/c69be377e17c2977681fef9113d13d91b62d1ee4/Source/WebCore/dom/DocumentFragment.cpp#L90-L98
Blink: https://github.com/chromium/chromium/blob/15444426f98a99830338697cce54e686e988815c/third_party/blink/renderer/core/editing/serializers/serialization.cc#L681-L702
Gecko: https://github.com/mozilla/gecko-dev/blob/6fc2f6d5335fb6f70f780b5fea5ed77b0719c3b5/dom/base/FragmentOrElement.cpp#L1991-L2002

Removing the call to "the end" fixes at least a couple of issues:
- Inserting `<img>` elements via innerHTML causes us to spin forever.

  This regressed in https://github.com/SerenityOS/serenity/commit/2413de7e10efda16f6f376b42b204279d7a25906
  
  This is because `m_load_event_delayer.clear()` is performed inside an
  element task callback. Because of the reasons stated above, this will
  never execute. This caused us to spin forever on step 8 of "the end",
  which is delaying the load event.
  
  This affected Google Docs and Google Maps, never allowing them to
  progress after performing this action. I have also seen it cause a
  Scorecard Research `<img>` beacon in a `<noscript>` element inserted
  via innerHTML to spin forever. This presumably affects many more
  sites as well.
  
  Given that the Window load event is not fired for HTML fragment
  parsers, spinning the event loop to delay the load event does not
  change anything, meaning this step can be skipped entirely.
  
- Microtask timing is messed up by the unconditional `spin_until`s on
  steps 7 and 8.
  
  "Spin the event loop" causes an unconditional microtask checkpoint:
  https://html.spec.whatwg.org/multipage/webappapis.html#spin-the-event-loop
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/EventLoop/EventLoop.cpp#L54
  
  > 3. Let old stack be a copy of the JavaScript execution context
       stack.
  > 4. Empty the JavaScript execution context stack.
  > 5. Perform a microtask checkpoint.
  > 6.2.1. Replace the JavaScript execution context stack with old
           stack.
           
  This broke YouTube with the introduction of custom elements, as
  custom elements use microtasks to upgrade elements and call
  callbacks. See https://github.com/whatwg/html/issues/8646 for a full
  example reduced from YouTube's JavaScript.
  
  Another potential fix for this issue is to remove the above steps
  from "spin the event loop". However, since we have another issue with
  the use of "spin the event loop", it would be best to just avoid
  both calls to it.

Considering all of the above, removing the call to "the end" is the way
forward for HTML fragment parsing, as all of it should be a no-op.

This is done by not simply returning from "the end" if the HTML parser
was created for HTML fragment parsing.

The end.
											
										
										
											2023-04-11 19:19:30 +01:00
+								    // The entirety of "the end" should be a no-op for HTML fragment parsers, because:
 								    // - the temporary document is not accessible, making the DOMContentLoaded event and "ready for post load tasks" do
 								    //   nothing, making the parser not re-entrant from document.{open,write,close} and document.readyState inaccessible
 								    // - there is no Window associated with it and no associated browsing context with the temporary document (meaning
 								    //   the Window load event is skipped and making the load timing info inaccessible)
 								    // - scripts are not able to be prepared, meaning the script queues are empty.
 								    // However, the unconditional "spin the event loop" invocations cause two issues:
 								    // - Microtask timing is changed, as "spin the event loop" performs an unconditional microtask checkpoint, causing
 								    //   things to happen out of order. For example, YouTube sets the innerHTML of a <template> element in the constructor
 								    //   of the ytd-app custom element _before_ setting up class attributes. Since custom elements use microtasks to run
 								    //   callbacks, this causes custom element callbacks that rely on attributes setup by the constructor to run before
 								    //   the attributes are set up, causing unhandled exceptions.
 								    // - Load event delaying can spin forever, e.g. if the fragment contains an <img> element which stops delaying the
 								    //   load event from an element task. Since tasks are not considered runnable if they're from a document with no
 								    //   browsing context (i.e. the temporary document made for innerHTML), the <img> element will forever delay the load
 								    //   event and cause an infinite loop.
 								    // We can avoid these issues and also avoid doing unnecessary work by simply skipping "the end" for HTML fragment
 								    // parsers.
 								    // See the message of the commit that added this for more details.
-												LibWeb: Make HTMLParser::the_end() callable from outside

This is a little awkward: The spec requires when loading media documents
or ones that don't have a DOM, that we "act as if the user agent had
stopped parsing document" which means following this algorithm. Only a
few steps require an HTMLParser, but those that do, involve reaching
into its internals. The simplest solution I could think of (other than
duplicating this fairly hefty function) is making it static and taking
a Document and optional HTMLParser as parameters.

											
										
										
											2023-12-19 12:51:34 +00:00
+								    if (parser && parser->m_parsing_fragment)
-												LibWeb: Return from "the end" during HTML fragment parsing

This will examine the algorithm known as "the end" from the HTML
specification, which executes when parsing HTML markup has completed,
and it's potential to observably run script or change certain
attributes.

This currently executes in our engine when parsing HTML received from
the internet during navigation, using document.{open,write,close},
setting the innerHTML attribute or using DOMParser. The latter two are
only possible by executing script.

This has been causing some issues in our engine, which will be shown
later, so we are considering removing the call to "the end" for these
two cases.

Spoiler: the implications of running "the end" for DOMParser will be
considered in the future. It is the only script-created HTML/XML parser
remaining after this commit that uses "the end", including it's XML
variant implemented as XMLDocumentBuilder::document_end().

This will only focus on setting the innerHTML attribute, which falls
under "HTML fragment parsing", which starts here in the specification:
https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L3491

While you may notice our HTMLParser::parse_html_fragment returns `void`
and assume this means no scripts are executed because of our use of
`WebIDL::ExceptionOr<T>` and `JS::ThrowCompletionOr<T>`, note that
dispatched events will execute arbitrary script via a callback, catch
any exceptions, report them and not propagate them. This means that
while a function does not return an exception type, it can still
potentially execute script.

A breakdown of the steps of "the end" in the context of HTML fragment
parsing and its observability follows:
https://html.spec.whatwg.org/multipage/parsing.html#the-end
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L221

1. No-op, as we don't currently have speculative HTML parsing. Even if
   we did, we would instantly return after stopping the speculative
   HTML parser anyway.

2. No-op, document.{open,write,close} are not accessible from the
   temporary document.

3. No-op, document.readyState, window.navigation.timing and the
   readystatechange event are not accessible from the created temporary
   document.

4. This is presumably done so that reentrant invocation of the HTML
   parser from document.{write,close} during the firing of the events
   after step 4 ends up parsing from a clean state. This is a no-op, as
   the events after step 4 do not fire and are not accessible.

5. No-op, we set HTMLScriptElement::m_already_started to true when
   creating it whilst parsing an HTML fragment, which causes
   HTMLScriptElement::prepare_script to instantly bail, meaning
   `scripts_to_execute_when_parsing_has_finished` is always empty.

6. No-op, tasks are considered not runnable when the document does not
   have a browsing context, which is always the case in fragment
   parsing. Additionally, window.navigation.timing and the
   DOMContentLoaded event aren't reachable from the temporary document.

7. Almost a no-op, `scripts_to_execute_as_soon_as_possible` is always
   empty for the same reason as step 4. However, this step uses an
   unconditional `spin_until` call, which _is_ observable and causes
   one of the alluded to issues, which will be talked about later.

8. No-op, as delaying the load event has no purpose in this case, as
   the task in step 9 will set the current document readiness to
   "complete" and then return immediately after, as the temporary
   document has no browsing context, skipping the Window load event.
   However, this step causes another alluded to issue, which will be
   talked about later.

9. No-op, for the same reason as step 6. Additionally,
   document.readyState is not accessible from the temporary document
   and the temporary document has no browsing context, so navigation
   timing, the Window load event, the pageshow event, the Document load
   event and the `<iframe>` load steps are not executed at all.

10. No-op, as this flag is only set from window.print(), which is not
    accessible for this document.

11. No-op, as the temporary document is not accessible from anything
    else and will be immediately destroyed after HTML fragment parsing.

Additionally, browsing context containers (`<iframe>`, `<frame>` and
`<object>`) cannot run in documents with no browsing context:

- `<iframe>` and `<frame>` use "create a new child navigable":
https://html.spec.whatwg.org/multipage/document-sequences.html#create-a-new-child-navigable
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/BrowsingContextContainer.cpp#L43-L45

> 2. Let group be element's node document's browsing context's
     top-level browsing context's group.

This requires the element's node document's browsing context to be
non-null, but it is always null with the temporary document created for
HTML fragment parsing.

This is protected against here for `<iframe>`:
https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element:the-iframe-element-6
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLIFrameElement.cpp#L45

> When an iframe element element is inserted into a document whose
  browsing context is non-null, the user agent must run these steps:
  1. Create a new child navigable for element.

This is currently not protected against for `<frame>` in the
specification:
https://html.spec.whatwg.org/multipage/obsolete.html#active-frame-element

> A frame element is said to be an active frame element when it is in a
  document.

> When a frame element element is created as an active frame element,
  or becomes an active frame element after not having been one, the
  user agent must run these steps:
>     1. Create a new child navigable for element.

However, since this would cause a null dereference, this is actually a
specification issue. See: https://github.com/whatwg/html/issues/9136

- `<object>` uses "queue an element task" and has a browsing context
  null check.
https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element:queue-an-element-task
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLObjectElement.cpp#L58
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLObjectElement.cpp#L105

> ...the user agent must queue an element task on the DOM manipulation
  task source given the object element to run the following steps to
  (re)determine what the object element represents.

As established above, tasks are not runnable in documents with null
browsing contexts. However, for avoidance of doubt, it checks if the
document's browsing context is null, and if so, it falls back to
representing the element's children and gets rid of any child navigable
the `<object>` element may have.

> 2. If the element has an ancestor media element, or has an ancestor
     object element that is not showing its fallback content, or if the
     element is not in a document whose browsing context is non-null,
     or if the element's node document is not fully active, or if the
     element is still in the stack of open elements of an HTML parser
     or XML parser, or if the element is not being rendered, then jump
     to the step below labeled fallback.

> 4. Fallback: The object element represents the element's children.
     This is the element's fallback content. Destroy a child navigable
     given the element.

This check also protects against an `<object>` element being adopted
from a document which has a browsing context to one that doesn't during
the time between the element task being queued and then executed.

This means a browsing context container cannot be ran, meaning browsing
context containers cannot access their parent document and access the
properties and events mentioned in steps 1-11 above, or use
document.{open,write,close} on the parent document.

Another potential avenue of running script via HTML fragment parsing
is via custom elements being in the markup, which need to be
synchronously upgraded. For example:
```
<custom-element></custom-element>
```

However, this is already protected against in the spec:
https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp#L643

> 7. If definition is non-null and the parser was not created as part
     of the HTML fragment parsing algorithm, then let will execute
     script be true. Otherwise, let it be false.

It is protected against overall by disabling custom elements via
returning `null` for all custom element definition lookups if the
document has no browsing context, which is the case for the temporary
document:
https://html.spec.whatwg.org/multipage/custom-elements.html#look-up-a-custom-element-definition
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Document.cpp#L2106-L2108

> 2. If document's browsing context is null, return null.

This is because the document doesn't have an associated Window, meaning
there will be no associated CustomElementRegistry object.

After running the HTML fragment parser, all of the child nodes are
removed the temporary document and then adopted into the context
element's node document. Skipping the `pre_remove` steps as they are
not relevant in this case, let's first examine Node::remove()'s
potential to execute script, then examine Document::adopt_node() after.
https://dom.spec.whatwg.org/#concept-node-remove
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Node.cpp#L534

1-7. Does not run any script, it just keeps a copy of some data that
     will be needed later in the algorithm and directly modifies live
     range attributes. However, since this relies on Range objects
     containing the temporary document, the Range steps are no-ops.

8. Though this uses the temporary document, it does not contain any
   NodeIterator objects as no script should have run, thus this
   callback will not be entered. Even if the document _did_ have
   associated NodeIterators, NodeIterator::run_pre_removing_steps does
   not execute any script.

9-11. Does not run any script, it just keeps a copy of some data that
      will be needed later in the algorithm and performs direct tree
      mutation to remove the node from the node tree.

12-14. "assign slottables" and step 13 queue mutation observer
       microtasks via "signal a slot change". However, since this is
       done _after_ running "the end", the "spin the event loop" steps
       in that algorithm does not affect this. Remember that queued
       microtasks due not execute during this algorithm for the next
       few steps.

Sidenote:
Microtasks are supposed to be executed when the JavaScript execution
context stack is empty. Since HTMLParser::parse_html_fragment is only
called from script, the stack will never be empty whilst it is running,
so microtasks will not run until some time after we exit this function.

15. This could potentially run script, let's have a look at the
    removal steps we currently have implemented in our engine:

- HTMLIFrameElement::removed_from()
  https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-iframe-element:the-iframe-element-7
  https://github.com/SerenityOS/serenity/blob/44cf92616e59bda951b67cdae78a6361bdd76f7a/Userland/Libraries/LibWeb/HTML/HTMLIFrameElement.cpp#L102

  Since browsing context containers cannot create child browsing
  contexts (as shown above), this code will do nothing. This will also
  hold true when we implement HTMLFrameElement::removed_from() in the
  future.

- FormAssociatedElement::removed_from()
  https://github.com/SerenityOS/serenity/blob/44cf92616e59bda951b67cdae78a6361bdd76f7a/Userland/Libraries/LibWeb/HTML/FormAssociatedElement.h#L36
  
  This calls `form_node_was_removed` which can then potentially call
  `reset_form_owner`. However, `reset_form_owner` only does tree
  traversal to find the appropriate form owner and does not execute
  any script. After calling `form_node_was_removed` it then calls
  `form_associated_element_was_removed`, which is a virtual function
  that no one currently overrides, meaning no script is executed.

- HTMLBaseElement::removed_from()
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLBaseElement.cpp#L45
  
  This will call `Document::update_base_element` to do tree traversal
  to find out the new first `<base>` element with an href attribute and
  thus does not execute any script.

- HTMLStyleElement::removed_from()
  https://html.spec.whatwg.org/multipage/semantics.html#update-a-style-block
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLStyleElement.cpp#L49
  
  This will call `update_a_style_block`, which will parse the `<style>`
  element's text content as CSS and create a style sheet from it. This
  does not execute any script.
  
In summary, step 15 does not currently execute any script and ideally
shouldn't in the future when we implement more `removed_from` steps.

16. Does not run any script, just saves a copy of a variable.

17. Queues a "disconnectedCallback" custom elements callback. This will
    execute script in the future, but not here.
    
18. Performs step 15 and 17 in combination for each of the node's
    descendants. This will not execute any script.
    
19. Does not run any script, it performs a requirement of mutation
    observers by adding certain things to a list.

20. Does not execute any script, as mutation observer callbacks are
    done via microtasks.

21. This will not execute script, as the parent is always the temporary
    document in HTML fragment parsing. There is no Document children
    changed steps, so this step is a no-op.
    
We then do layout invalidation which is our own addition, but this also
does not execute any script.

In short, removing a node does not execute any script. It could execute
script in the future, but since this is done by tasks, it will not
execute until we are outside of HTMLParser::parse_html_fragment.

Let's look at adopting a node:
https://dom.spec.whatwg.org/#concept-node-adopt
https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/DOM/Document.cpp#L1414

1. Does not run script, it just keeps a reference to the temporary
   document.

2. No-op, we removed the node above.

3.1. Does not execute script, it simply updates all descendants of
     the removed node to be in the context element's node document.

3.2. Does not execute script, see node removal step 17.

3.3. This could potentially execute script, let's have a look at the
     adopting steps we have implemented in our engine:

- HTMLTemplateElement::adopted_from()
  https://html.spec.whatwg.org/multipage/scripting.html#the-template-element:concept-node-adopt-ext
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/HTMLTemplateElement.cpp#L38

  This simply adopts the `<template>` element's DocumentFragment node
  into its inert document. This does not execute any script.
  
We then have our own addition of adopting NodeIterators over to the
context element's document, but this does not execute any script.

In short, adopting a node does not execute any script.

After adopting the nodes to the context element's document, HTML
fragment parsing is complete and the temporary document is no longer
accessible at all.

Document and element event handlers are also not accessible, even if
the event bubbles. This is simply because the temporary document is not
accessible, so tree traversal, IDL event handler attributes and
EventTarget#addEventListener are not accessible, on the document or any
descendants. Document is also not an Element, so element event handler
attributes do not apply.

In summary, this establishes that HTML fragment parsers should not run
any user script or internal C++ code that relies on things set up by
"the end". This means that the attributes set up and events fired by
"the end" are not observable in this case. This may have not explored
every single possible avenue, but the general assertion should still
hold. However, this assertion is violated by "the end" containing two
unconditional "spin the event loop" invocations and causes issues with
live web content, so we seek to avoid them.

As WebKit, Blink and Gecko have been able to get away with doing fast
path optimizations for HTML fragment parsing which don't setup
navigation timing, run events, etc. it is presumed we are able to get
away with not running "the end" for HTML fragment parsing as well.
WebKit: https://github.com/WebKit/WebKit/blob/c69be377e17c2977681fef9113d13d91b62d1ee4/Source/WebCore/dom/DocumentFragment.cpp#L90-L98
Blink: https://github.com/chromium/chromium/blob/15444426f98a99830338697cce54e686e988815c/third_party/blink/renderer/core/editing/serializers/serialization.cc#L681-L702
Gecko: https://github.com/mozilla/gecko-dev/blob/6fc2f6d5335fb6f70f780b5fea5ed77b0719c3b5/dom/base/FragmentOrElement.cpp#L1991-L2002

Removing the call to "the end" fixes at least a couple of issues:
- Inserting `<img>` elements via innerHTML causes us to spin forever.

  This regressed in https://github.com/SerenityOS/serenity/commit/2413de7e10efda16f6f376b42b204279d7a25906
  
  This is because `m_load_event_delayer.clear()` is performed inside an
  element task callback. Because of the reasons stated above, this will
  never execute. This caused us to spin forever on step 8 of "the end",
  which is delaying the load event.
  
  This affected Google Docs and Google Maps, never allowing them to
  progress after performing this action. I have also seen it cause a
  Scorecard Research `<img>` beacon in a `<noscript>` element inserted
  via innerHTML to spin forever. This presumably affects many more
  sites as well.
  
  Given that the Window load event is not fired for HTML fragment
  parsers, spinning the event loop to delay the load event does not
  change anything, meaning this step can be skipped entirely.
  
- Microtask timing is messed up by the unconditional `spin_until`s on
  steps 7 and 8.
  
  "Spin the event loop" causes an unconditional microtask checkpoint:
  https://html.spec.whatwg.org/multipage/webappapis.html#spin-the-event-loop
  https://github.com/SerenityOS/serenity/blob/44dd8247647474df95137452b3c9cad9b83326be/Userland/Libraries/LibWeb/HTML/EventLoop/EventLoop.cpp#L54
  
  > 3. Let old stack be a copy of the JavaScript execution context
       stack.
  > 4. Empty the JavaScript execution context stack.
  > 5. Perform a microtask checkpoint.
  > 6.2.1. Replace the JavaScript execution context stack with old
           stack.
           
  This broke YouTube with the introduction of custom elements, as
  custom elements use microtasks to upgrade elements and call
  callbacks. See https://github.com/whatwg/html/issues/8646 for a full
  example reduced from YouTube's JavaScript.
  
  Another potential fix for this issue is to remove the above steps
  from "spin the event loop". However, since we have another issue with
  the use of "spin the event loop", it would be best to just avoid
  both calls to it.

Considering all of the above, removing the call to "the end" is the way
forward for HTML fragment parsing, as all of it should be a no-op.

This is done by not simply returning from "the end" if the HTML parser
was created for HTML fragment parsing.

The end.
											
										
										
											2023-04-11 19:19:30 +01:00
+								        return;
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								    // 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.
 								    if (parser && parser->m_active_speculative_html_parser) {
 								        parser->stop_the_speculative_html_parser();
 								        return;
 								    }
-												LibWeb: Split out "The end" from the HTML parsing spec to a function

Also add a spec link and some comments.

											
										
										
											2021-09-26 00:00:00 +02:00
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
+								    // 2. Set the insertion point to undefined.
-												LibWeb: Make HTMLParser::the_end() callable from outside

This is a little awkward: The spec requires when loading media documents
or ones that don't have a DOM, that we "act as if the user agent had
stopped parsing document" which means following this algorithm. Only a
few steps require an HTMLParser, but those that do, involve reaching
into its internals. The simplest solution I could think of (other than
duplicating this fairly hefty function) is making it static and taking
a Document and optional HTMLParser as parameters.

											
										
										
											2023-12-19 12:51:34 +00:00
+								    if (parser)
 								        parser->m_tokenizer.undefine_insertion_point();
-												LibWeb: Fire a DOMContentLoaded event when the new parser is finished

With this change, we can finally load and render welcome.html :^)

											
										
										
											2020-05-27 23:32:50 +02:00
-												LibWeb: Split out "The end" from the HTML parsing spec to a function

Also add a spec link and some comments.

											
										
										
											2021-09-26 00:00:00 +02:00
+								    // 3. Update the current document readiness to "interactive".
-												LibWeb: Make HTMLParser::the_end() callable from outside

This is a little awkward: The spec requires when loading media documents
or ones that don't have a DOM, that we "act as if the user agent had
stopped parsing document" which means following this algorithm. Only a
few steps require an HTMLParser, but those that do, involve reaching
into its internals. The simplest solution I could think of (other than
duplicating this fairly hefty function) is making it static and taking
a Document and optional HTMLParser as parameters.

											
										
										
											2023-12-19 12:51:34 +00:00
+								    document->update_readiness(HTML::DocumentReadyState::Interactive);
-												LibWeb: Implement document ready state

											
										
										
											2020-08-31 13:56:16 +01:00
-												LibWeb: Split out "The end" from the HTML parsing spec to a function

Also add a spec link and some comments.

											
										
										
											2021-09-26 00:00:00 +02:00
+								    // 4. Pop all the nodes off the stack of open elements.
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    if (parser)
 								        parser->pop_all_open_elements();
-												LibWeb: Pop entire stack of open elements at the end of parsing

											
										
										
											2021-09-20 17:04:55 +02:00
-												LibWeb: Set readyState to complete for DOMParser documents

Documents created via DOMParser.parseFromString()
are parsed synchronously and do not participate in the
browsing context's loading pipeline.

This patch ensures that if the document has no browsing context
(i.e. was parsed via DOMParser),
its readiness is set to "complete" synchronously.

Fixes WPT:
domparsing/xmldomparser.html

											
										
										
											2025-06-04 09:44:03 +02:00
+								    // AD-HOC: Skip remaining steps when there's no browsing context.
 								    // This happens when parsing HTML via DOMParser or similar mechanisms.
 								    // Note: This diverges from the spec, which expects more steps to follow.
 								    if (!document->browsing_context()) {
 								        // Parsed via DOMParser, no need to wait for load events.
 								        document->update_readiness(HTML::DocumentReadyState::Complete);
 								        return;
 								    }
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    // Steps 5-11 are handled by the HTMLParserEndState state machine.
 								    auto state = HTMLParserEndState::create(document, parser);
 								    document->set_html_parser_end_state(state);
 								    state->schedule_progress_check();
 								}
 								static constexpr int THE_END_TIMEOUT_MS = 15000;
-												LibWeb: Replace spin_until in HTMLParser::handle_text with async resume

Spinning a nested event loop to wait for a parser-blocking script blocks
the calling thread, can deadlock, and creates reentrancy hazards. Switch
to an event-driven pause/resume model, mirroring the prior
HTMLParserEndState refactor (df96b69e7a).

Three WPT document.write tests flip from Fail to Pass and are
rebaselined: all write an external script via document.write() followed
by inline content. With spin_until, control did not return to the caller
of document.write() between writing the script and observing its effects
so the test's order assertions saw a different sequence than the spec
mandates.

											
										
										
											2026-04-25 23:59:12 +02:00
+								// Perform a microtask checkpoint matching spin_until's pre-check semantics: pending microtasks (e.g. image load-event
 								// delayer creation from update_the_image_data step 8) must be drained before checking parser progress. The empty-queue
 								// fast path avoids the save/clear/restore of the execution context stack and notify_about_rejected_promises when there
 								// is nothing to drain.
 								static void perform_pre_progress_microtask_checkpoint()
 								{
 								    auto& event_loop = main_thread_event_loop();
 								    if (event_loop.microtask_queue_empty())
 								        return;
 								    auto& vm = event_loop.vm();
 								    vm.save_execution_context_stack();
 								    vm.clear_execution_context_stack();
 								    event_loop.perform_a_microtask_checkpoint();
 								    vm.restore_execution_context_stack();
 								}
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								GC::Ref<HTMLParserEndState> HTMLParserEndState::create(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
 								{
 								    return document->heap().allocate<HTMLParserEndState>(document, parser);
 								}
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								HTMLParserEndState::HTMLParserEndState(GC::Ref<DOM::Document> document, GC::Ptr<HTMLParser> parser)
 								    : m_document(document)
 								    , m_parser(parser)
 								    , m_timeout(Platform::Timer::create_single_shot(heap(), THE_END_TIMEOUT_MS, GC::create_function(heap(), [this] {
 								        if (m_phase != Phase::Completed)
 								            dbgln("HTMLParserEndState: timed out in phase {}", to_underlying(m_phase));
 								    })))
 								{
 								    m_timeout->start();
 								}
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								void HTMLParserEndState::visit_edges(Cell::Visitor& visitor)
 								{
 								    Base::visit_edges(visitor);
 								    visitor.visit(m_document);
 								    visitor.visit(m_parser);
 								    visitor.visit(m_timeout);
 								}
 								void HTMLParserEndState::schedule_progress_check()
 								{
 								    if (m_phase == Phase::Completed)
 								        return;
 								    if (m_check_pending)
 								        return;
 								    m_check_pending = true;
 								    Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(heap(), [this] {
-												LibWeb: Replace spin_until in HTMLParser::handle_text with async resume

Spinning a nested event loop to wait for a parser-blocking script blocks
the calling thread, can deadlock, and creates reentrancy hazards. Switch
to an event-driven pause/resume model, mirroring the prior
HTMLParserEndState refactor (df96b69e7a).

Three WPT document.write tests flip from Fail to Pass and are
rebaselined: all write an external script via document.write() followed
by inline content. With spin_until, control did not return to the caller
of document.write() between writing the script and observing its effects
so the test's order assertions saw a different sequence than the spec
mandates.

											
										
										
											2026-04-25 23:59:12 +02:00
+								        perform_pre_progress_microtask_checkpoint();
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								        check_progress();
 								        m_check_pending = false;
 								    }));
 								}
 								void HTMLParserEndState::check_progress()
 								{
 								    // AD-HOC: Bail out if the document is no longer fully active (e.g. navigated away from).
 								    if (!m_document->is_fully_active()) {
 								        complete();
 								        return;
-												LibWeb: Handle two kinds of deferred script executions

This patch adds two script lists to Document:

- Scripts to execute when parsing has finished
- Scripts to execute as soon as possible

Since we don't actually load scripts asynchronously yet (we just do a
synchronous load when parsing the <script> element for simplicity),
these are already loaded by the time we get to "The end" of parsing.

											
										
										
											2020-05-30 12:26:15 +02:00
+								    }
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    switch (m_phase) {
 								    case Phase::WaitingForDeferredScripts:
 								        // 5. While the list of scripts that will execute when the document has finished parsing is not empty:
 								        while (!m_document->scripts_to_execute_when_parsing_has_finished().is_empty()) {
 								            auto& first_script = *m_document->scripts_to_execute_when_parsing_has_finished().first();
 								            // 1. Spin the event loop until the first script in the list of scripts that will execute when the document has finished parsing
 								            //    has its "ready to be parser-executed" flag set and the parser's Document has no style sheet that is blocking scripts.
 								            if (!first_script.is_ready_to_be_parser_executed() || m_document->has_a_style_sheet_that_is_blocking_scripts())
 								                return;
 								            // 2. Execute the first script in the list of scripts that will execute when the document has finished parsing.
 								            first_script.execute_script();
 								            // 3. Remove the first script element from the list of scripts that will execute when the document has finished parsing (i.e. shift out the first entry in the list).
 								            (void)m_document->scripts_to_execute_when_parsing_has_finished().take_first();
 								        }
 								        advance_to_asap_scripts_phase();
 								        [[fallthrough]];
 								    case Phase::WaitingForASAPScripts:
 								        // 7. Spin the event loop until the set of scripts that will execute as soon as possible and the list of scripts
 								        //    that will execute in order as soon as possible are empty.
 								        if (!m_document->scripts_to_execute_as_soon_as_possible().is_empty()
 								            || !m_document->scripts_to_execute_in_order_as_soon_as_possible().is_empty())
 								            return;
 								        m_phase = Phase::WaitingForLoadEventDelay;
 								        [[fallthrough]];
 								    case Phase::WaitingForLoadEventDelay:
 								        // 8. Spin the event loop until there is nothing that delays the load event in the Document.
 								        if (m_document->anything_is_delaying_the_load_event())
 								            return;
 								        m_phase = Phase::Completed;
 								        [[fallthrough]];
 								    case Phase::Completed:
 								        complete();
 								        return;
 								    }
 								}
 								void HTMLParserEndState::advance_to_asap_scripts_phase()
 								{
-												LibWeb: Respect scroll position set by script during page load

When setting scroll position during page load we need to consider
whether we actually have a fragment to scroll to. A script may already
have run at that point and may already have set a scroll position.

If there is an actual fragment to scroll to, it is fine to scroll to
that fragment, since it should take precedence. If we don't have a
fragment however, we should not unnecessarily overwrite the scroll
position set by the script back to (0, 0).

Since this problem is caused by a spec bug, I have tested the behavior
in the three major browsers engines. Unfortunately they do not agree
fully with each other. If there is no fragment at all (e.g. `foo.html`),
all browsers will respect the scroll position set by the script. If
there is a fragment (e.g. `foo.html#bar`), all browsers will set the
scroll position to the fragment element and ignore the one set by
script. However, when the fragment is empty (e.g. `foo.html#`), then
Blink and WebKit will set scroll position to the fragment, while Gecko
will set scroll position from script. Since all of this is ad-hoc
behavior anyway, I simply implemented the Blink/WebKit behavior because
of the majority vote for now.

This fixes a regression introduced in 51102254b5.

											
										
										
											2025-03-07 22:04:36 +01:00
+								    // AD-HOC: We need to scroll to the fragment on page load somewhere.
 								    // But a script that ran in step 5 above may have scrolled the page already,
 								    // so only do this if there is an actual fragment to avoid resetting the scroll position unexpectedly.
 								    // Spec bug: https://github.com/whatwg/html/issues/10914
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    auto indicated_part = m_document->determine_the_indicated_part();
-												LibWeb: Respect scroll position set by script during page load

When setting scroll position during page load we need to consider
whether we actually have a fragment to scroll to. A script may already
have run at that point and may already have set a scroll position.

If there is an actual fragment to scroll to, it is fine to scroll to
that fragment, since it should take precedence. If we don't have a
fragment however, we should not unnecessarily overwrite the scroll
position set by the script back to (0, 0).

Since this problem is caused by a spec bug, I have tested the behavior
in the three major browsers engines. Unfortunately they do not agree
fully with each other. If there is no fragment at all (e.g. `foo.html`),
all browsers will respect the scroll position set by the script. If
there is a fragment (e.g. `foo.html#bar`), all browsers will set the
scroll position to the fragment element and ignore the one set by
script. However, when the fragment is empty (e.g. `foo.html#`), then
Blink and WebKit will set scroll position to the fragment, while Gecko
will set scroll position from script. Since all of this is ad-hoc
behavior anyway, I simply implemented the Blink/WebKit behavior because
of the majority vote for now.

This fixes a regression introduced in 51102254b5.

											
										
										
											2025-03-07 22:04:36 +01:00
+								    if (indicated_part.has<DOM::Element*>() && indicated_part.get<DOM::Element*>() != nullptr) {
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								        m_document->scroll_to_the_fragment();
-												LibWeb: Respect scroll position set by script during page load

When setting scroll position during page load we need to consider
whether we actually have a fragment to scroll to. A script may already
have run at that point and may already have set a scroll position.

If there is an actual fragment to scroll to, it is fine to scroll to
that fragment, since it should take precedence. If we don't have a
fragment however, we should not unnecessarily overwrite the scroll
position set by the script back to (0, 0).

Since this problem is caused by a spec bug, I have tested the behavior
in the three major browsers engines. Unfortunately they do not agree
fully with each other. If there is no fragment at all (e.g. `foo.html`),
all browsers will respect the scroll position set by the script. If
there is a fragment (e.g. `foo.html#bar`), all browsers will set the
scroll position to the fragment element and ignore the one set by
script. However, when the fragment is empty (e.g. `foo.html#`), then
Blink and WebKit will set scroll position to the fragment, while Gecko
will set scroll position from script. Since all of this is ad-hoc
behavior anyway, I simply implemented the Blink/WebKit behavior because
of the majority vote for now.

This fixes a regression introduced in 51102254b5.

											
										
										
											2025-03-07 22:04:36 +01:00
+								    }
-												LibWeb/HTML: Scroll to the fragment before loading the document

Otherwise nowhere ends up scrolling to the fragment specified by the
fragment in document's URL. This fixes ladybird scrolling to the
correct location in the document when navigating to a link that
has a fragment, e.g:

https://html.spec.whatwg.org/multipage/browsing-the-web.html#try-to-scroll-to-the-fragment

As well as use of the :target selector.

											
										
										
											2025-01-15 17:15:25 +13:00
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								    // 6. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following substeps:
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    queue_global_task(HTML::Task::Source::DOMManipulation, *m_document, GC::create_function(m_document->heap(), [document = m_document] {
-												LibWeb: Save begin/end timestamps for load and DOMContentLoaded events

											
										
										
											2022-09-21 00:43:38 +02:00
+								        // 1. Set the Document's load timing info's DOM content loaded event start time to the current high resolution time given the Document's relevant global object.
-												LibWeb: Use "current high resolution time" AO where relevant

And updating some spec comments to latest spec where it is not relevant.

											
										
										
											2024-04-11 22:42:35 +02:00
+								        document->load_timing_info().dom_content_loaded_event_start_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document));
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								        // 2. Fire an event named DOMContentLoaded at the Document object, with its bubbles attribute initialized to true.
-												LibJS: Make Heap::allocate<T>() infallible

Stop worrying about tiny OOMs. Work towards #20449.

While going through these, I also changed the function signature in many
places where returning ThrowCompletionOr<T> is no longer necessary.

											
										
										
											2023-08-13 13:05:26 +02:00
+								        auto content_loaded_event = DOM::Event::create(document->realm(), HTML::EventNames::DOMContentLoaded);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								        content_loaded_event->set_bubbles(true);
-												LibWeb: Make factory methods of DOM::Event fallible

Because of interdependencies between DOM::Event and UIEvents::MouseEvent
to template function fire_an_event() in WebDriverConnection.cpp, the
commit: 'LibWeb: Make factory methods of UIEvents::MouseEvent fallible'
have been squashed into this commit.

											
										
										
											2023-02-14 22:43:17 +01:00
+								        document->dispatch_event(content_loaded_event);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Save begin/end timestamps for load and DOMContentLoaded events

											
										
										
											2022-09-21 00:43:38 +02:00
+								        // 3. Set the Document's load timing info's DOM content loaded event end time to the current high resolution time given the Document's relevant global object.
-												LibWeb: Use "current high resolution time" AO where relevant

And updating some spec comments to latest spec where it is not relevant.

											
										
										
											2024-04-11 22:42:35 +02:00
+								        document->load_timing_info().dom_content_loaded_event_end_time = HighResolutionTime::current_high_resolution_time(relevant_global_object(*document));
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								        // FIXME: 4. Enable the client message queue of the ServiceWorkerContainer object whose associated service worker client is the Document object's relevant settings object.
 								        // FIXME: 5. Invoke WebDriver BiDi DOM content loaded with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "pending", and url is the Document object's URL.
-												LibWeb: Let queue_global_task() take a JS::HeapFunction

Changes the signature of queue_global_task() from AK:Function to
JS::HeapFunction to be more clear to the user of the function that this
is what it uses internally.

											
										
										
											2024-04-16 22:04:01 +02:00
+								    }));
-												LibWeb: Dispatch "load" on document and window

These happen right after "DOMContentLoaded" for now, which is incorrect
since they should really wait until subresources have loaded.
However, this makes a bunch of things work already so let's do it.

											
										
										
											2020-10-18 13:45:28 +02:00
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    m_phase = Phase::WaitingForASAPScripts;
 								}
-												LibWeb: Implement document ready state

											
										
										
											2020-08-31 13:56:16 +01:00
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								void HTMLParserEndState::complete()
 								{
 								    m_phase = Phase::Completed;
 								    m_timeout->stop();
 								    m_document->set_html_parser_end_state(nullptr);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								    // 9. Queue a global task on the DOM manipulation task source given the Document's relevant global object to run the following steps:
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    queue_global_task(HTML::Task::Source::DOMManipulation, *m_document, GC::create_function(m_document->heap(), [document = m_document, parser = m_parser] {
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								        // 1. Update the current document readiness to "complete".
-												LibWeb: Implement "update the current document readiness" from spec

The only difference from what we were already doing is that setting the
same ready state twice no longer fires a "readystatechange" event.
I don't think that could happen in practice though.

											
										
										
											2021-09-26 12:22:16 +02:00
+								        document->update_readiness(HTML::DocumentReadyState::Complete);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Wait until ReadyState is complete before detaching HTML parser

Previously, the DOM complete time was never being set, as the HTML
parser was detached before `DocumentReadyState` was set to complete.

											
										
										
											2025-01-06 11:09:47 +00:00
+								        // AD-HOC: We need to wait until the document ready state is complete before detaching the parser, otherwise the DOM complete time will not be set correctly.
 								        if (parser)
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								            document->detach_parser();
-												LibWeb: Wait until ReadyState is complete before detaching HTML parser

Previously, the DOM complete time was never being set, as the HTML
parser was detached before `DocumentReadyState` was set to complete.

											
										
										
											2025-01-06 11:09:47 +00:00
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								        // 2. If the Document object's browsing context is null, then abort these steps.
 								        if (!document->browsing_context())
 								            return;
-												LibWeb: Add the PageTransitionEvent interface and fire "pageshow" events

We now fire "pageshow" events at the appropriate time during document
loading (done by the parser.)

Note that there are no corresponding "pagehide" events yet.

											
										
										
											2021-09-26 12:39:27 +02:00
+								        // 3. Let window be the Document's relevant global object.
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								        auto& window = as<Window>(relevant_global_object(*document));
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Save begin/end timestamps for load and DOMContentLoaded events

											
										
										
											2022-09-21 00:43:38 +02:00
+								        // 4. Set the Document's load timing info's load event start time to the current high resolution time given window.
-												LibWeb: Use "current high resolution time" AO where relevant

And updating some spec comments to latest spec where it is not relevant.

											
										
										
											2024-04-11 22:42:35 +02:00
+								        document->load_timing_info().load_event_start_time = HighResolutionTime::current_high_resolution_time(window);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								        // 5. Fire an event named load at window, with legacy target override flag set.
 								        // FIXME: The legacy target override flag is currently set by a virtual override of dispatch_event()
 								        //        We should reorganize this so that the flag appears explicitly here instead.
-												LibWeb: Ignore window-forwarded document.body.onfoo in detached DOM

Normally, assigning to e.g document.body.onload will forward to
window.onload. However, in a detached DOM tree, there is no associated
window, so we have nowhere to forward to, making this a no-op.

The bulk of this change is making Document::window() return a nullable
pointer, as documents created by DOMParser or DOMImplementation do not
have an associated window object, and so must be able to return null
from here.

											
										
										
											2024-03-10 08:41:18 +01:00
+								        window.dispatch_event(DOM::Event::create(document->realm(), HTML::EventNames::load));
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								        // FIXME: 6. Invoke WebDriver BiDi load complete with the Document's browsing context, and a new WebDriver BiDi navigation status whose id is the Document object's navigation id, status is "complete", and url is the Document object's URL.
 								        // FIXME: 7. Set the Document object's navigation id to null.
-												LibWeb: Save begin/end timestamps for load and DOMContentLoaded events

											
										
										
											2022-09-21 00:43:38 +02:00
+								        // 8. Set the Document's load timing info's load event end time to the current high resolution time given window.
-												LibWeb: Use "current high resolution time" AO where relevant

And updating some spec comments to latest spec where it is not relevant.

											
										
										
											2024-04-11 22:42:35 +02:00
+								        document->load_timing_info().load_event_end_time = HighResolutionTime::current_high_resolution_time(window);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Add a "page showing" flag to documents

This will be used to determine whether "pageshow" and "pagehide" events
are appropriate. We won't actually make use of it until we implement
more of history traversal and document unloading.

											
										
										
											2021-09-26 12:26:39 +02:00
+								        // 9. Assert: Document's page showing is false.
 								        VERIFY(!document->page_showing());
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb/HTML: Remove "flag" word from usage of "page showing"

Corresponds to https://github.com/whatwg/html/commit/30935f34744194dda14fae589935b99d2eb3362f

											
										
										
											2025-03-14 16:35:19 +00:00
+								        // 10. Set the Document's page showing to true.
-												LibWeb: Add a "page showing" flag to documents

This will be used to determine whether "pageshow" and "pagehide" events
are appropriate. We won't actually make use of it until we implement
more of history traversal and document unloading.

											
										
										
											2021-09-26 12:26:39 +02:00
+								        document->set_page_showing(true);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
-												LibWeb: Add the PageTransitionEvent interface and fire "pageshow" events

We now fire "pageshow" events at the appropriate time during document
loading (done by the parser.)

Note that there are no corresponding "pagehide" events yet.

											
										
										
											2021-09-26 12:39:27 +02:00
+								        // 11. Fire a page transition event named pageshow at window with false.
-												LibWeb: Ignore window-forwarded document.body.onfoo in detached DOM

Normally, assigning to e.g document.body.onload will forward to
window.onload. However, in a detached DOM tree, there is no associated
window, so we have nowhere to forward to, making this a no-op.

The bulk of this change is making Document::window() return a nullable
pointer, as documents created by DOMParser or DOMImplementation do not
have an associated window object, and so must be able to return null
from here.

											
										
										
											2024-03-10 08:41:18 +01:00
+								        window.fire_a_page_transition_event(HTML::EventNames::pageshow, false);
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
 								        // 12. Completely finish loading the Document.
 								        document->completely_finish_loading();
 								        // FIXME: 13. Queue the navigation timing entry for the Document.
-												LibWeb: Let queue_global_task() take a JS::HeapFunction

Changes the signature of queue_global_task() from AK:Function to
JS::HeapFunction to be more clear to the user of the function that this
is what it uses internally.

											
										
										
											2024-04-16 22:04:01 +02:00
+								    }));
-												LibWeb: Make event dispatching spec-compliant

Specification: https://dom.spec.whatwg.org/#concept-event-dispatch

This also introduces shadow roots due to it being a requirement of
the event dispatcher.

However, it does not introduce the full shadow DOM, that can be
left for future work.

This changes some event dispatches which require certain attributes
to be initialised to a value.

											
										
										
											2020-11-21 18:32:39 +00:00
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								    // FIXME: 10. If the Document's print when loaded flag is set, then run the printing steps.
-												LibWeb: Make event dispatching spec-compliant

Specification: https://dom.spec.whatwg.org/#concept-event-dispatch

This also introduces shadow roots due to it being a requirement of
the event dispatcher.

However, it does not introduce the full shadow DOM, that can be
left for future work.

This changes some event dispatches which require certain attributes
to be initialised to a value.

											
										
										
											2020-11-21 18:32:39 +00:00
-												LibWeb: Implement more of HTMLParser::the_end() and bring closer to spec

											
										
										
											2021-09-26 00:51:02 +02:00
+								    // 11. The Document is now ready for post-load tasks.
-												LibWeb: Replace spin_until in HTMLParser::the_end() with state machine

HTMLParser::the_end() had three spin_until calls that blocked the event
loop: step 5 (deferred scripts), step 7 (ASAP scripts), and step 8
(load event delay). This replaces them with an HTMLParserEndState state
machine that progresses asynchronously via callbacks.

The state machine has three phases matching the three spin_until calls:
- WaitingForDeferredScripts: loops executing ready deferred scripts
- WaitingForASAPScripts: waits for ASAP script lists to empty
- WaitingForLoadEventDelay: waits for nothing to delay the load event

Notification triggers re-evaluate the state machine when conditions
change: HTMLScriptElement::mark_as_ready, stylesheet unblocking in
StyleElementBase/HTMLLinkElement, did_stop_being_active_document, and
DocumentLoadEventDelayer decrements. NavigableContainer state changes
(session history readiness, content navigable cleared, lazy load flag)
also trigger re-evaluation of the load event delay check.

Key design decisions and why:

1. Microtask checkpoint in schedule_progress_check(): The old spin_until
   called perform_a_microtask_checkpoint() before checking conditions.
   This is critical because HTMLImageElement::update_the_image_data step
   8 queues a microtask that creates the DocumentLoadEventDelayer.
   Without the checkpoint, check_progress() would see zero delayers and
   complete before images start delaying the load event.

2. deferred_invoke in schedule_progress_check():
   I tried Core::Timer (0ms), queue_global_task, and synchronous calls.
   Timers caused non-deterministic ordering with the HTML event loop's
   task processing timer, leading to image layout tests failing (wrong
   subtest pass/fail patterns). Synchronous calls fired too early during
   image load processing before dimensions were set, causing 0-height
   images in layout tests. queue_global_task had task ordering issues
   with the session history traversal queue. deferred_invoke runs after
   the current callback returns but within the same event loop pump,
   giving the right balance.

3. Navigation load event guard (m_navigation_load_event_guard): During
   cross-document navigation, finalize_a_cross_document_navigation step
   2 calls set_delaying_load_events(false) before the session history
   traversal activates the new document. This creates a transient state
   where the parent's load event delay check sees the about:blank (which
   has ready_for_post_load_tasks=true) as the active document and
   completes prematurely.

											
										
										
											2026-03-28 09:39:51 +01:00
+								    m_document->set_ready_for_post_load_tasks(true);
-												LibWeb: Start implementing character token parsing

Now that we've gotten rid of the misguided character buffering in the
tokenizer, it actually spits out character tokens that we have to deal
with in the parser.

This patch implements enough to bring us back to speed with simple.html

											
										
										
											2020-05-24 19:51:50 +02:00
+								}
-												LibWeb: Properly append attributes to element when creating an Element

The main behavioural difference here is that the full qualified name is
appended to the element, rather than just the local name and value.

											
										
										
											2023-10-04 17:45:48 +13:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								GC::Ref<DOM::Element> HTMLParser::create_element_for(HTMLToken const& token, Optional<FlyString> const& namespace_, DOM::Node& intended_parent)
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								{
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								    // 1. If the active speculative HTML parser is not null, then return the result of creating a speculative mock element given namespace, token's tag name, and token's attributes.
 								    // The active speculative HTML parser runs synchronously to completion, so it is null whenever the real
 								    // parser invokes this algorithm. The speculative parser produces mock elements via its own path.
 								    // 2. Otherwise, optionally create a speculative mock element given namespace, token's tag name, and token's attributes.
 								    // We deliberately skip step 2 — the active speculative parser already issues these fetches, so doing it
 								    // again here would be redundant.
-												LibWeb: Associate form elements with a form in parsing and dynamically

This makes it available for all form associated elements and not just
select and input elements. It also makes it more spec compliant,
especially around the form attribute.

The main thing missing is re-associating form elements with a form
attribute when the form attribute changes or an element with an ID
is inserted/removed or has its ID changed.

											
										
										
											2022-03-01 21:10:48 +00:00
-												LibWeb/HTML: Update spec text in create_element_for()

No behaviour changes.

											
										
										
											2025-11-25 14:59:25 +00:00
+								    // 3. Let document be intendedParent's node document.
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								    GC::Ref<DOM::Document> document = intended_parent.document();
-												LibWeb: Associate form elements with a form in parsing and dynamically

This makes it available for all form associated elements and not just
select and input elements. It also makes it more spec compliant,
especially around the form attribute.

The main thing missing is re-associating form elements with a form
attribute when the form attribute changes or an element with an ID
is inserted/removed or has its ID changed.

											
										
										
											2022-03-01 21:10:48 +00:00
-												LibWeb/HTML: Update spec text in create_element_for()

No behaviour changes.

											
										
										
											2025-11-25 14:59:25 +00:00
+								    // 4. Let localName be token's tag name.
-												LibWeb: Avoid copy of local_name in HTMLParser::create_element_for

											
										
										
											2023-10-04 17:50:52 +13:00
+								    auto const& local_name = token.tag_name();
-												LibWeb: Associate form elements with a form in parsing and dynamically

This makes it available for all form associated elements and not just
select and input elements. It also makes it more spec compliant,
especially around the form attribute.

The main thing missing is re-associating form elements with a form
attribute when the form attribute changes or an element with an ID
is inserted/removed or has its ID changed.

											
										
										
											2022-03-01 21:10:48 +00:00
-												LibWeb/HTML: Update spec text in create_element_for()

No behaviour changes.

											
										
										
											2025-11-25 14:59:25 +00:00
+								    // 5. Let is be the value of the "is" attribute in token, if such an attribute exists; otherwise null.
-												LibWeb: Return an Optional<String> from HTMLToken::attribute

Move away from using a nullable StringView.

											
										
										
											2023-11-11 09:27:43 +13:00
+								    auto is_value = token.attribute(AttributeNames::is);
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								    // 6. Let registry be the result of looking up a custom element registry given intendedParent.
 								    auto registry = look_up_a_custom_element_registry(intended_parent);
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								    // 7. Let definition be the result of looking up a custom element definition given registry, namespace, localName,
 								    //    and is.
 								    auto definition = look_up_a_custom_element_definition(registry, namespace_, local_name, is_value);
 								    // 8. Let willExecuteScript be true if definition is non-null and the parser was not created as part of the HTML
 								    //    fragment parsing algorithm; otherwise false.
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
+								    bool will_execute_script = definition && !m_parsing_fragment;
-												LibWeb/HTML: Update spec text in create_element_for()

No behaviour changes.

											
										
										
											2025-11-25 14:59:25 +00:00
+								    // 9. If willExecuteScript is true:
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
+								    if (will_execute_script) {
 								        // 1. Increment document's throw-on-dynamic-markup-insertion counter.
 								        document->increment_throw_on_dynamic_markup_insertion_counter({});
 								        // 2. If the JavaScript execution context stack is empty, then perform a microtask checkpoint.
 								        auto& vm = main_thread_event_loop().vm();
-												LibWeb: Use VM helpers for execution context access

Inline JS-to-JS frames no longer live in the raw execution context
vector, so LibWeb callers that need to inspect or pop contexts now go
through VM helpers instead of peeking into that storage directly.

This keeps the execution context bookkeeping encapsulated while
preserving existing microtask and realm-entry checks.

											
										
										
											2026-04-13 12:49:53 +02:00
+								        if (!vm.has_running_execution_context())
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
+								            perform_a_microtask_checkpoint();
 								        // 3. Push a new element queue onto document's relevant agent's custom element reactions stack.
-												LibWeb: Split out SimilarOriginWindowAgent from HTML::Agent

To allow for adding the concept of a WorkerAgent to be reused
between shared and dedicated workers. An event loop is the
commonality between the different agent types, though, there
are some differences between those event loops which we customize
on the construction of the HTML::EventLoop.

											
										
										
											2025-04-24 15:04:13 +12:00
+								        relevant_similar_origin_window_agent(document).custom_element_reactions_stack.element_queue_stack.append({});
-												LibWeb: Introduce CustomElementRegistry and creating custom elements

The main missing feature here is form associated custom elements.

											
										
										
											2023-03-29 23:46:18 +01:00
+								    }
-												LibWeb: Associate form elements with a form in parsing and dynamically

This makes it available for all form associated elements and not just
select and input elements. It also makes it more spec compliant,
especially around the form attribute.

The main thing missing is re-associating form elements with a form
attribute when the form attribute changes or an element with an ID
is inserted/removed or has its ID changed.

											
										
										
											2022-03-01 21:10:48 +00:00
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								    // 10. Let element be the result of creating an element given document, localName, namespace, null, is,
 								    //     willExecuteScript, and registry.
 								    auto element = create_element(*document, local_name, namespace_, {}, is_value, will_execute_script, registry).release_value_but_fixme_should_propagate_errors();
-												LibWeb: Associate form elements with a form in parsing and dynamically

This makes it available for all form associated elements and not just
select and input elements. It also makes it more spec compliant,
especially around the form attribute.

The main thing missing is re-associating form elements with a form
attribute when the form attribute changes or an element with an ID
is inserted/removed or has its ID changed.

											
										
										
											2022-03-01 21:10:48 +00:00
-												LibWeb: Track if element was created from token with dupe attributes

This is required for CSP to ignore the nonce attribute to prevent
duplicate attributes hijacking the attribute.

See https://w3c.github.io/webappsec-csp/#security-nonce-hijacking
											
										
										
											2024-12-02 12:33:52 +00:00
+								    // AD-HOC: See AD-HOC comment on Element.m_had_duplicate_attribute_during_tokenization about why this is done.
 								    if (token.had_duplicate_attribute()) {
 								        element->set_had_duplicate_attribute_during_tokenization({});
 								    }
-												LibWeb: Block rendering until linked stylesheets are loaded

This commit implements the main "render blocking" behavior for link
elements, drastically reducing the amount of FOUC (flash of unstyled
content) we subject our users to.

The document will now block rendering until linked style sheets
referenced by parser-created link elements have loaded (or failed).

Note that we don't yet extend the blocking period until "critical
subresources" such as imported style sheets have been downloaded
as well.

											
										
										
											2025-02-27 15:30:26 +01:00
+								    // AD-HOC: Let <link> elements know which document they were originally parsed for.
 								    //         This is used for the render-blocking logic.
 								    if (local_name == HTML::TagNames::link && namespace_ == Namespace::HTML) {
-												LibWeb: Track whether HTMLLinkElement was enabled when created by parser

This information is needed by the script-blocking style sheet logic, and
its absence was causing a WPT test to crash.

											
										
										
											2025-04-24 12:36:54 +02:00
+								        auto& link_element = as<HTMLLinkElement>(*element);
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        link_element.set_parser_document({}, document);
 								        link_element.set_was_enabled_when_created_by_parser({}, !token.has_attribute(HTML::AttributeNames::disabled));
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								    }
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 11. Append each attribute in the given token to element.
 								    token.for_each_attribute([&](auto const& attribute) {
 								        DOM::QualifiedName qualified_name { attribute.local_name, attribute.prefix, attribute.namespace_ };
 								        auto dom_attribute = realm().create<DOM::Attr>(*document, move(qualified_name), attribute.value, element);
 								        element->append_attribute(dom_attribute);
 								        return IterationDecision::Continue;
 								    });
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // AD-HOC: The muted attribute on media elements is only set if the muted content attribute is present when the element is first created.
 								    if (element->is_html_media_element() && namespace_ == Namespace::HTML) {
 								        // https://html.spec.whatwg.org/multipage/media.html#user-interface:attr-media-muted
 								        // When a media element is created, if the element has a muted content attribute specified, then the muted IDL
 								        // attribute should be set to true; otherwise, the user agents may set the value to the user's preferred value.
 								        if (element->has_attribute(HTML::AttributeNames::muted)) {
 								            auto& media_element = as<HTMLMediaElement>(*element);
 								            media_element.set_muted(true);
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        }
 								    }
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 12. If willExecuteScript is true:
 								    if (will_execute_script) {
 								        // 1. Let queue be the result of popping from document's relevant agent's custom element reactions stack.
 								        //    (This will be the same element queue as was pushed above.)
 								        auto queue = relevant_similar_origin_window_agent(document).custom_element_reactions_stack.element_queue_stack.take_last();
-												LibWeb: Flesh out the "in frameset" parsing state and add spec comments

This also implements two FIXMEs, which were covered by WPT tests.

											
										
										
											2024-11-03 11:20:07 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        // 2. Invoke custom element reactions in queue.
 								        Bindings::invoke_custom_element_reactions(queue);
-												LibWeb: Flesh out the "in frameset" parsing state and add spec comments

This also implements two FIXMEs, which were covered by WPT tests.

											
										
										
											2024-11-03 11:20:07 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        // 3. Decrement document's throw-on-dynamic-markup-insertion counter.
 								        document->decrement_throw_on_dynamic_markup_insertion_counter({});
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								    }
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // FIXME: 13. If element has an xmlns attribute in the XMLNS namespace whose value is not exactly the same as the element's namespace, that is a parse error.
 								    //            Similarly, if element has an xmlns:xlink attribute in the XMLNS namespace whose value is not the XLink Namespace, that is a parse error.
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (auto* html_element = as_if<HTML::HTMLElement>(*element)) {
 								        if (html_element->is_form_associated_element() && !html_element->is_form_associated_custom_element()) {
 								            // 14. If element is a resettable element and not a form-associated custom element, then invoke its reset algorithm.
 								            //     (This initializes the element's value and checkedness based on the element's attributes.)
 								            if (html_element->is_resettable())
 								                html_element->reset_algorithm();
-												LibWeb: Flesh out the "in frameset" parsing state and add spec comments

This also implements two FIXMEs, which were covered by WPT tests.

											
										
										
											2024-11-03 11:20:07 +01:00
+								        }
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								    }
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 16. Return element.
 								    return element;
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								}
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								void HTMLParser::schedule_resume_check()
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								{
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (m_resume_check_pending)
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (!m_parser_pause_flag)
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    m_resume_check_pending = true;
 								    Platform::EventLoopPlugin::the().deferred_invoke(GC::create_function(heap(), [this] {
 								        m_resume_check_pending = false;
 								        perform_pre_progress_microtask_checkpoint();
 								        resume_after_parser_blocking_script();
 								    }));
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								}
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
 								// Async equivalent of "spin the event loop until ... ready to be parser-executed" from the per-iteration block of the
 								// "text" insertion mode (steps 4-13). Driven by schedule_resume_check.
 								void HTMLParser::resume_after_parser_blocking_script()
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								{
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (!m_parser_pause_flag)
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (m_aborted || m_stop_parsing)
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    auto pending = document().pending_parsing_blocking_script();
 								    auto pending_svg = document().pending_parsing_blocking_svg_script();
 								    bool ready = false;
 								    if (pending)
 								        ready = pending->is_ready_to_be_parser_executed();
 								    else if (pending_svg)
 								        ready = pending_svg->is_ready_to_be_parser_executed();
 								    else
-												LibWeb: Update parser with more insertion modes :^)

Implements handling of InHeadNoScript, InSelectInTable, InTemplate,
InFrameset, AfterFrameset, and AfterAfterFrameset.

											
										
										
											2020-06-21 06:58:03 +02:00
+								        return;
-												LibWeb: Add spec comments to 'process the rules for foreign content'

											
										
										
											2023-09-23 08:47:13 +12:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 5. If the parser's Document has a style sheet that is blocking scripts or the script's ready to be
 								    //    parser-executed is false: spin the event loop until the parser's Document has no style sheet that is blocking
 								    //    scripts and the script's ready to be parser-executed becomes true.
 								    // The async equivalent: return without taking the script; schedule_resume_check re-fires this method when the
 								    // relevant state changes.
 								    if (m_document->has_a_style_sheet_that_is_blocking_scripts())
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (!ready)
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
+								        return;
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 3. Start the speculative HTML parser for this instance of the HTML parser.
 								    // (Done at the pause point in the corresponding insertion-mode handler, so that speculation runs during the wait.)
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 4. Block the tokenizer for this instance of the HTML parser, such that the event loop will not run tasks that
 								    //    invoke the tokenizer.
 								    // (No-op: pausing is expressed by returning from run() and m_parser_pause_flag, not a tokenizer-level block flag.)
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 6. If this parser has been aborted in the meantime, return.
 								    if (m_aborted)
-												LibWeb: Improvements to error handling in HTML foreign content parsing

Follow the spec more closely when encountering an invalid start or end
tag during foreign content parsing.

											
										
										
											2021-09-14 23:49:45 +02:00
+								        return;
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 7. Stop the speculative HTML parser for this instance of the HTML parser.
 								    stop_the_speculative_html_parser();
-												LibWeb: Add spec comments to 'process the rules for foreign content'

											
										
										
											2023-09-23 08:47:13 +12:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 8. Unblock the tokenizer for this instance of the HTML parser, such that tasks that invoke the tokenizer can
 								    //    again be run. (No-op, see step 4.)
-												LibWeb: Add support for inline SVG element scripts

											
										
										
											2023-09-26 01:12:21 +13:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 9. Let the insertion point be just before the next input character.
 								    m_tokenizer.update_insertion_point();
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 10. Increment the parser's script nesting level by one (it should be zero before this step, so this sets it to
 								    //     one).
 								    VERIFY(script_nesting_level() == 0);
 								    increment_script_nesting_level();
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // Step 8 unblocked the tokenizer above. Our async "spin the event loop" implementation uses the parser pause flag
 								    // to yield while waiting for the pending script, so clear it before executing the script. This allows
 								    // document.write() calls made by the script to synchronously re-enter the parser up to the insertion point.
 								    m_parser_pause_flag = false;
-												LibWeb: Add spec comments to 'process the rules for foreign content'

											
										
										
											2023-09-23 08:47:13 +12:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 1. Let the script be the pending parsing-blocking script.
 								    // 2. Set the pending parsing-blocking script to null.
 								    // 11. Execute the script element the script.
 								    if (pending)
 								        document().take_pending_parsing_blocking_script({})->execute_script();
 								    else
 								        document().take_pending_parsing_blocking_svg_script({})->execute_pending_parser_blocking_script({});
-												LibWeb: Add support for inline SVG element scripts

											
										
										
											2023-09-26 01:12:21 +13:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 12. Decrement the parser's script nesting level by one.
 								    decrement_script_nesting_level();
-												LibWeb: Add spec comments to 'process the rules for foreign content'

											
										
										
											2023-09-23 08:47:13 +12:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // If the parser's script nesting level is zero (which it always should be at this point), then set the parser pause
 								    // flag to false.
 								    VERIFY(script_nesting_level() == 0);
 								    m_parser_pause_flag = false;
-												LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.

											
										
										
											2022-02-19 15:58:21 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // 13. Let the insertion point be undefined again.
 								    m_tokenizer.undefine_insertion_point();
-												LibWeb: Make external SVG script fetches async

Replace the spin_until in SVGScriptElement::process_the_script_element
with an async fetch that mirrors HTMLScriptElement's mark_as_ready
pattern. External SVG scripts now fetch and execute asynchronously,
matching Chromium's behavior.

For HTML-embedded SVG scripts, the parser pauses via the existing
schedule_resume_check infrastructure, extended to support SVG scripts
through a new pending_parsing_blocking_svg_script slot on Document.
For top-level XML/SVG documents, scripts execute when their fetch
completes; the load event is delayed via DocumentLoadEventDelayer which
the existing XMLDocumentBuilder::document_end already waits on.

											
										
										
											2026-04-26 21:29:05 +02:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // The spec's loop would handle the next pending parsing-blocking script before continuing normal tokenization.
 								    // In this async implementation, pause again and resume when that next script is ready.
 								    if (document().has_pending_parsing_blocking_script()) {
 								        m_parser_pause_flag = true;
 								        schedule_resume_check();
-												LibWeb: Do not crash when parsing a SVG script element

Just leave a FIXME dbgln message instead. This works around a crash seen
in html5test.com.

											
										
										
											2023-09-23 10:52:12 +12:00
+								        return;
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
+								    }
-												LibWeb: Do not crash when parsing a SVG script element

Just leave a FIXME dbgln message instead. This works around a crash seen
in html5test.com.

											
										
										
											2023-09-23 10:52:12 +12:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    // The spec's "While the pending parsing-blocking script is not null" iteration is realized by run() pausing again
 								    // on the next </script> end tag if the executed script set up a new pending blocking script (e.g. via
 								    // document.write).
 								    run();
-												LibWeb: Fully implement end tag parsing in foreign content

Required to view the Spotify home page
											
										
										
											2021-01-02 23:02:23 +00:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (m_parser_pause_flag)
 								        return;
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    invoke_post_parse_action();
-												LibWeb: Add initial implementation of foreign content parsing

Plus sneak in a FIXME for the list of active formatting elements
and a test for Element.namespaceURI
											
										
										
											2020-10-12 01:51:28 +01:00
+								}
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								void HTMLParser::invoke_post_parse_action()
-												LibWeb: Start fleshing out the "in table" parser insertion mode

											
										
										
											2020-05-25 20:30:34 +02:00
+								{
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    if (auto action = exchange(m_post_parse_action, nullptr))
 								        action();
 								}
-												LibWeb: Flesh out "reset the insertion mode appropriately" algorithm

											
										
										
											2020-05-28 00:26:33 +02:00
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								void HTMLParser::increment_script_nesting_level()
 								{
 								    ++m_script_nesting_level;
-												LibWeb: Start fleshing out the "in table" parser insertion mode

											
										
										
											2020-05-25 20:30:34 +02:00
+								}
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								void HTMLParser::decrement_script_nesting_level()
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								{
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								    VERIFY(m_script_nesting_level);
 								    --m_script_nesting_level;
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								}
-												LibWeb: Rename HTMLDocumentParser => HTMLParser

											
										
										
											2021-09-25 23:15:48 +02:00
+								DOM::Document& HTMLParser::document()
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								{
 								    return *m_document;
 								}
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								WebIDL::ExceptionOr<Vector<GC::Root<DOM::Node>>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots, ParserScriptingMode scripting_mode)
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
+								{
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 1. Assert: scriptingMode is either Inert or Fragment.
 								    VERIFY(scripting_mode == HTML::ParserScriptingMode::Inert || scripting_mode == HTML::ParserScriptingMode::Fragment);
 								    // 2. Let document be a Document node whose type is "html".
-												LibWeb: Indicate documents are for fragment parsing during construction

This will allow testing if they are for fragment parsing during methods
invoked from Document::initialize.

											
										
										
											2024-07-31 12:02:38 -04:00
+								    auto temp_document = DOM::Document::create_for_fragment_parsing(context_element.realm());
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    temp_document->set_document_type(DOM::Document::Type::HTML);
-												LibWeb: Fix crash when setting innerHTML inside iframe srcdoc document

In particular, there was an assertion failure due to the temporary
parser document's "about base URL" being empty when trying to "parse a
URL" during parsing.

We fix this by copying the context element's document's about base URL
to the temporary parsing document while parsing a fragment.

This fixes a crash when loading search results on https://amazon.com/

											
										
										
											2024-08-28 16:22:44 +02:00
+								    // AD-HOC: We set the about base URL of the document to the same as the context element's document.
 								    //         This is required for Document::parse_url() to work inside iframe srcdoc documents.
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								    //         Spec issue: https://github.com/whatwg/html/issues/12210
-												LibWeb: Fix crash when setting innerHTML inside iframe srcdoc document

In particular, there was an assertion failure due to the temporary
parser document's "about base URL" being empty when trying to "parse a
URL" during parsing.

We fix this by copying the context element's document's about base URL
to the temporary parsing document while parsing a fragment.

This fixes a crash when loading search results on https://amazon.com/

											
										
										
											2024-08-28 16:22:44 +02:00
+								    temp_document->set_about_base_url(context_element.document().about_base_url());
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 3. Let contextDocument be context's node document.
 								    auto& context_document = context_element.document();
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 4. If contextDocument is in quirks mode, then set document's mode to "quirks".
 								    if (context_document.in_quirks_mode()) {
 								        temp_document->set_quirks_mode(DOM::QuirksMode::Yes);
 								    }
 								    // 5. Otherwise, if context's node document is in limited-quirks mode, then set document's mode to "limited-quirks".
 								    else if (context_element.document().in_limited_quirks_mode()) {
-												LibWeb: Bring fragment parsing up to spec

Corresponds to https://github.com/whatwg/html/pull/10874

Also, parse_fragment() returns ExceptionOr, so stop voiding the error
from append_child().

											
										
										
											2025-01-07 13:50:19 +00:00
+								        temp_document->set_quirks_mode(DOM::QuirksMode::Limited);
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    }
-												LibWeb: Bring fragment parsing up to spec

Corresponds to https://github.com/whatwg/html/pull/10874

Also, parse_fragment() returns ExceptionOr, so stop voiding the error
from append_child().

											
										
										
											2025-01-07 13:50:19 +00:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 6. If allowDeclarativeShadowRoots is true, then set document's allow declarative shadow roots to true.
-												LibWeb: Implement unsafe HTML parsing methods

Both Element's and ShadowRoot's setHTMLUnsafe, and Document's static
parseHTMLUnsafe methods are implemented.

											
										
										
											2024-06-25 20:55:58 +01:00
+								    if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes)
 								        temp_document->set_allow_declarative_shadow_roots(true);
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 7. Create a new HTML parser, and associate it with document.
 								    // 8. If contextDocument's scripting is disabled, then set scriptingMode to Disabled.
 								    // 9. Set the parser's scripting mode to scriptingMode.
 								    if (context_element.document().is_scripting_disabled())
 								        scripting_mode = HTML::ParserScriptingMode::Disabled;
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    auto parser = HTMLParser::create(*temp_document, markup, scripting_mode, "utf-8"sv);
-												LibWeb: Stop creating transient throwaway JS::Handles in HTML parser

These were being immediately stored in JS::GCPtrs (and dutifully visited
by HTMLParser), so creating temporary handles for them was a complete
waste of time.

											
										
										
											2024-07-20 14:38:32 +02:00
+								    parser->m_context_element = context_element;
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								    parser->m_parsing_fragment = true;
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 10. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    bool const context_element_is_html = context_element.namespace_uri() == Namespace::HTML;
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    // - title
 								    // - textarea
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    if (context_element_is_html
 								        && context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								        // Switch the tokenizer to the RCDATA state.
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        parser->m_tokenizer.switch_to(HTMLTokenizer::State::RCDATA);
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    }
 								    // - style
 								    // - xmp
 								    // - iframe
 								    // - noembed
 								    // - noframes
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    else if (context_element_is_html
 								        && context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								        // Switch the tokenizer to the RAWTEXT state.
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        parser->m_tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    }
 								    // - script
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::script)) {
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								        // Switch the tokenizer to the script data state.
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        parser->m_tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    }
 								    // - noscript
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								        // If scripting mode is not Disabled, switch the tokenizer to the RAWTEXT state. Otherwise, leave the tokenizer in the data state.
 								        if (scripting_mode != HTML::ParserScriptingMode::Disabled)
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								            parser->m_tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    }
 								    // - plaintext
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    else if (context_element_is_html && context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								        // Switch the tokenizer to the PLAINTEXT state.
-												LibWeb: Remove the C++ HTML tree builder

Delete the old C++ tree-construction implementation and helper classes
that became unused once the Rust parser is unconditional. Remove the C++
stack of open elements, active formatting elements, speculative mock
element, and tree-builder-only token storage.

Keep the C++ parser entry points that still own LibWeb DOM integration,
encoding detection, tokenizer bridging, incremental parsing, and the
speculative parser support used by resource discovery.

											
										
										
											2026-05-16 13:47:49 +02:00
+								        parser->m_tokenizer.switch_to(HTMLTokenizer::State::PLAINTEXT);
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
+								    }
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    // Any other element
 								    else {
 								        // Leave the tokenizer in the data state.
 								    }
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 11. Let root be the result of creating an element given document, "html", the HTML namespace, null, null, false,
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								    //    and context's custom element registry.
 								    auto root = MUST(create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML, {}, {}, false, context_element.custom_element_registry()));
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 12. Append root to document.
-												LibWeb: Handle currently ignored `WebIDL::ExceptionOr<T>`s

											
										
										
											2022-10-30 17:50:04 +00:00
+								    MUST(temp_document->append_child(root));
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 17. Set the HTML parser's form element pointer to the nearest node to context that is a form element
-												LibWeb: Annotate and simplify the HTML fragment parsing algorithm

This patch adds inline spec comments, and then adjusts the code a bit
so it reads more like the spec.

											
										
										
											2022-10-29 12:48:35 +02:00
+								    //     (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
 								    //     (If there is no such form element, the form element pointer keeps its initial value, null.)
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    parser->m_form_element = as_if<HTMLFormElement>(context_element);
 								    if (!parser->m_form_element)
 								        parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    auto context_local_name = context_element.local_name().bytes_as_string_view();
 								    auto context_namespace = context_element.namespace_uri();
 								    auto context_namespace_ffi = namespace_to_html_parser_ffi(context_namespace);
 								    StringView context_namespace_uri;
 								    if (context_namespace_ffi == RustFfiHtmlNamespace::Other && context_namespace.has_value())
 								        context_namespace_uri = context_namespace->bytes_as_string_view();
 								    Vector<RustFfiHtmlParserAttribute> context_attributes;
 								    if (auto attributes = context_element.attributes()) {
 								        context_attributes.ensure_capacity(attributes->length());
 								        for (size_t i = 0; i < attributes->length(); ++i) {
 								            auto const* attribute = attributes->item(i);
 								            auto local_name = attribute->local_name().bytes_as_string_view();
 								            auto value = attribute->value().bytes_as_string_view();
 								            auto prefix = attribute->prefix().map([](auto const& prefix) { return prefix.bytes_as_string_view(); });
 								            context_attributes.unchecked_append({
 								                reinterpret_cast<u8 const*>(local_name.characters_without_null_termination()),
 								                local_name.length(),
 								                prefix.has_value() ? reinterpret_cast<u8 const*>(prefix->characters_without_null_termination()) : nullptr,
 								                prefix.has_value() ? prefix->length() : 0,
 								                attribute_namespace_to_html_parser_ffi(attribute->namespace_uri()),
 								                reinterpret_cast<u8 const*>(value.characters_without_null_termination()),
 								                value.length(),
 								            });
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								        }
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    }
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    rust_html_parser_begin_fragment(
 								        parser->m_rust_parser,
 								        reinterpret_cast<size_t>(root.ptr()),
 								        reinterpret_cast<size_t>(&context_element),
 								        context_namespace_ffi,
 								        reinterpret_cast<u8 const*>(context_namespace_uri.characters_without_null_termination()),
 								        context_namespace_uri.length(),
 								        reinterpret_cast<u8 const*>(context_local_name.characters_without_null_termination()),
 								        context_local_name.length(),
 								        context_attributes.data(),
 								        context_attributes.size(),
 								        quirks_mode_to_html_parser_ffi(temp_document->mode()),
 								        parser->m_form_element ? reinterpret_cast<size_t>(parser->m_form_element.ptr()) : 0);
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 18. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
 								    // 19. Start the HTML parser and let it run until it has consumed all the characters just inserted into the input stream.
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								    parser->run(context_element.document().url());
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    // 20. Return root's children, in tree order.
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								    Vector<GC::Root<DOM::Node>> children;
 								    while (GC::Ptr<DOM::Node> child = root->first_child()) {
-												LibWeb: Handle currently ignored `WebIDL::ExceptionOr<T>`s

											
										
										
											2022-10-30 17:50:04 +00:00
+								        MUST(root->remove_child(*child));
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
+								        context_element.document().adopt_node(*child);
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								        children.append(GC::make_root(*child));
-												LibWeb: Implement fragment parsing and use it for Element.innerHTML

This patch implements most of the HTML fragment parsing algorithm and
ports Element::set_inner_html() to it. This was the last remaining user
of the old HTML parser. :^)

											
										
										
											2020-06-25 23:42:08 +02:00
+								    }
 								    return children;
 								}
-												LibWeb: Implement encoding sniffing algorithm

This patch implements the HTML specification's "encoding sniffing
algorithm", which is used when no encoding can be obtained from the
Content-Type header (either because it doesn't contain a charset=...)
value or the file has not been opened via HTTP (as with local files).

It also modifies the creator of the HTMLDocumentParser to use the new
HTMLDocumentParser::create_with_uncertain_encoding static method, which
runs the encoding sniffing algorithm before instantiating the parser.

This now allows us to load local HTML pages (or remote pages without a
charset specified in the 'Content-Type' header) with a non-UTF-8
encoding such as 'windows-1252'. This would previously crash the
browser. :^)

											
										
										
											2021-05-12 10:47:12 +02:00
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								GC::Ref<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document)
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								{
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    return document.realm().create<HTMLParser>(document, scripting_mode, ScriptCreatedParser::Yes);
-												LibWeb: Track whether HTMLParser is script-created

Add a ScriptCreatedParser flag plumbed through HTMLParser's constructor
and create_for_scripting(). Only document.open()'s parser sets it to
Yes. Document::close() step 3 now checks is_script_created() so it
correctly skips parsers that weren't created via document.open(),
matching the spec.

Previously the check was just `if (!m_parser)`, which incorrectly let
document.close() insert an EOF into a network-driven parser. The bug
was mostly latent because the network parser used to finish quickly,
but it matters once the network parser stays alive for the duration of
a streamed parse.

											
										
										
											2026-04-28 19:47:49 +02:00
+								}
 								GC::Ref<HTMLParser> HTMLParser::create_with_open_input_stream(DOM::Document& document)
 								{
 								    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    return document.realm().create<HTMLParser>(document, scripting_mode, ScriptCreatedParser::No);
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								}
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								GC::Ref<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type)
-												LibWeb: Implement encoding sniffing algorithm

This patch implements the HTML specification's "encoding sniffing
algorithm", which is used when no encoding can be obtained from the
Content-Type header (either because it doesn't contain a charset=...)
value or the file has not been opened via HTTP (as with local files).

It also modifies the creator of the HTMLDocumentParser to use the new
HTMLDocumentParser::create_with_uncertain_encoding static method, which
runs the encoding sniffing algorithm before instantiating the parser.

This now allows us to load local HTML pages (or remote pages without a
charset specified in the 'Content-Type' header) with a non-UTF-8
encoding such as 'windows-1252'. This would previously crash the
browser. :^)

											
										
										
											2021-05-12 10:47:12 +02:00
+								{
-												LibWeb: Set fragment scripting mode from the context document

This corresponds with the editorial change to the HTML standard
introducing the parsing mode enum of:

https://github.com/whatwg/html/commit/01c45cede

And a follow up normative change of:

https://github.com/whatwg/html/commit/508706c80

Making fragment parsing derive its scripting mode from the context
document.

											
										
										
											2026-04-12 13:31:30 +02:00
+								    auto scripting_mode = document.is_scripting_enabled() ? ParserScriptingMode::Normal : ParserScriptingMode::Disabled;
-												LibWeb: Implement encoding sniffing algorithm

This patch implements the HTML specification's "encoding sniffing
algorithm", which is used when no encoding can be obtained from the
Content-Type header (either because it doesn't contain a charset=...)
value or the file has not been opened via HTTP (as with local files).

It also modifies the creator of the HTMLDocumentParser to use the new
HTMLDocumentParser::create_with_uncertain_encoding static method, which
runs the encoding sniffing algorithm before instantiating the parser.

This now allows us to load local HTML pages (or remote pages without a
charset specified in the 'Content-Type' header) with a non-UTF-8
encoding such as 'windows-1252'. This would previously crash the
browser. :^)

											
										
										
											2021-05-12 10:47:12 +02:00
+								    if (document.has_encoding())
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								        return document.realm().create<HTMLParser>(document, scripting_mode, input, document.encoding().value().to_byte_string());
-												LibWeb: Use Content-Type header to set document encoding

Co-authored-by: Shannon Booth <shannon@serenityos.org>

											
										
										
											2024-10-20 19:39:50 +11:00
+								    auto encoding = run_encoding_sniffing_algorithm(document, input, maybe_mime_type);
-												LibWeb: Put HTML parser encoding sniffing debug logging behind a flag

											
										
										
											2022-10-08 13:10:01 +02:00
+								    dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding);
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    return document.realm().create<HTMLParser>(document, scripting_mode, input, encoding);
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								}
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								GC::Ref<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, ParserScriptingMode scripting_mode, StringView encoding)
-												LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)

											
										
										
											2022-02-21 21:54:21 +01:00
+								{
-												LibWeb: Make the Rust HTML parser unconditional

Remove the runtime selector between the old C++ tree builder and the new
Rust implementation. Always construct HTML documents and fragments with
the Rust parser now that it matches the existing tests.

Simplify dump-html-tree by dropping the backend option that only made
sense while both parser implementations were available.

											
										
										
											2026-05-16 13:35:37 +02:00
+								    return document.realm().create<HTMLParser>(document, scripting_mode, input, encoding);
-												LibWeb: Implement encoding sniffing algorithm

This patch implements the HTML specification's "encoding sniffing
algorithm", which is used when no encoding can be obtained from the
Content-Type header (either because it doesn't contain a charset=...)
value or the file has not been opened via HTTP (as with local files).

It also modifies the creator of the HTMLDocumentParser to use the new
HTMLDocumentParser::create_with_uncertain_encoding static method, which
runs the encoding sniffing algorithm before instantiating the parser.

This now allows us to load local HTML pages (or remote pages without a
charset specified in the 'Content-Type' header) with a non-UTF-8
encoding such as 'windows-1252'. This would previously crash the
browser. :^)

											
										
										
											2021-05-12 10:47:12 +02:00
+								}
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								enum class AttributeMode {
 								    No,
 								    Yes,
 								};
-												LibWeb+LibUnicode+WebContent: Port DOM:CharacterData to UTF-16

This replaces the underlying storage of CharacterData with Utf16String
and deals with the fallout.

											
										
										
											2025-07-24 12:05:52 -04:00
+								template<OneOf<Utf8View, Utf16View> ViewType>
 								static String escape_string(ViewType const& string, AttributeMode attribute_mode)
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								{
 								    // https://html.spec.whatwg.org/multipage/parsing.html#escapingString
 								    StringBuilder builder;
-												LibWeb+LibUnicode+WebContent: Port DOM:CharacterData to UTF-16

This replaces the underlying storage of CharacterData with Utf16String
and deals with the fallout.

											
										
										
											2025-07-24 12:05:52 -04:00
+								    for (auto code_point : string) {
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								        // 1. Replace any occurrence of the "&" character by the string "&amp;".
 								        if (code_point == '&')
 								            builder.append("&amp;"sv);
 								        // 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string "&nbsp;".
 								        else if (code_point == 0xA0)
 								            builder.append("&nbsp;"sv);
-												LibWeb: Escape "<" and ">" when serializing attribute values

See https://github.com/whatwg/html/pull/6362

											
										
										
											2025-05-22 14:53:53 +10:00
+								        // 3. Replace any occurrences of the "<" character by the string "&lt;".
 								        else if (code_point == '<')
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								            builder.append("&lt;"sv);
-												LibWeb: Escape "<" and ">" when serializing attribute values

See https://github.com/whatwg/html/pull/6362

											
										
										
											2025-05-22 14:53:53 +10:00
+								        // 4. Replace any occurrences of the ">" character by the string "&gt;".
 								        else if (code_point == '>')
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								            builder.append("&gt;"sv);
-												LibWeb: Escape "<" and ">" when serializing attribute values

See https://github.com/whatwg/html/pull/6362

											
										
										
											2025-05-22 14:53:53 +10:00
+								        // 5. If the algorithm was invoked in the attribute mode, then replace any occurrences of the """ character by the string "&quot;".
 								        else if (code_point == '"' && attribute_mode == AttributeMode::Yes)
 								            builder.append("&quot;"sv);
-												LibWeb: Factor out attribute serialization into a separate function

											
										
										
											2024-04-09 14:46:21 +02:00
+								        else
 								            builder.append_code_point(code_point);
 								    }
 								    return builder.to_string_without_validation();
 								}
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								String HTMLParser::serialize_html_fragment(DOM::Node const& node, SerializableShadowRoots serializable_shadow_roots, Vector<GC::Root<DOM::ShadowRoot>> const& shadow_roots, DOM::FragmentSerializationMode fragment_serialization_mode)
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								{
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								    // NOTE: Steps in this function are jumbled a bit to accommodate the Element.outerHTML API.
 								    //       When called with FragmentSerializationMode::Outer, we will serialize the element itself,
 								    //       not just its children.
 								    // 2. Let s be a string, and initialize it to the empty string.
 								    StringBuilder builder;
 								    auto serialize_element = [&](DOM::Element const& element) {
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // If current node is an element in the HTML namespace, the MathML namespace, or the SVG namespace, then let tagname be current node's local name.
 								        // Otherwise, let tagname be current node's qualified name.
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        FlyString tag_name;
 								        if (element.namespace_uri().has_value() && element.namespace_uri()->is_one_of(Namespace::HTML, Namespace::MathML, Namespace::SVG))
 								            tag_name = element.local_name();
 								        else
 								            tag_name = element.qualified_name();
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // Append a U+003C LESS-THAN SIGN character (<), followed by tagname.
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        builder.append('<');
 								        builder.append(tag_name);
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // If current node's is value is not null, and the element does not have an is attribute in its attribute list,
 								        // then append the string " is="",
 								        // followed by current node's is value escaped as described below in attribute mode,
 								        // followed by a U+0022 QUOTATION MARK character (").
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        if (element.is_value().has_value() && !element.has_attribute(AttributeNames::is)) {
 								            builder.append(" is=\""sv);
-												LibWeb+LibUnicode+WebContent: Port DOM:CharacterData to UTF-16

This replaces the underlying storage of CharacterData with Utf16String
and deals with the fallout.

											
										
										
											2025-07-24 12:05:52 -04:00
+								            builder.append(escape_string(element.is_value().value().code_points(), AttributeMode::Yes));
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            builder.append('"');
 								        }
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // For each attribute that the element has,
 								        // append a U+0020 SPACE character,
 								        // the attribute's serialized name as described below,
 								        // a U+003D EQUALS SIGN character (=),
 								        // a U+0022 QUOTATION MARK character ("),
 								        // the attribute's value, escaped as described below in attribute mode,
 								        // and a second U+0022 QUOTATION MARK character (").
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        element.for_each_attribute([&](auto const& attribute) {
 								            builder.append(' ');
 								            // An attribute's serialized name for the purposes of the previous paragraph must be determined as follows:
 								            // -> If the attribute has no namespace:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            if (!attribute.namespace_uri().has_value()) {
 								                // The attribute's serialized name is the attribute's local name.
 								                builder.append(attribute.local_name());
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            // -> If the attribute is in the XML namespace:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            else if (attribute.namespace_uri() == Namespace::XML) {
 								                // The attribute's serialized name is the string "xml:" followed by the attribute's local name.
 								                builder.append("xml:"sv);
 								                builder.append(attribute.local_name());
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            // -> If the attribute is in the XMLNS namespace and the attribute's local name is xmlns:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            else if (attribute.namespace_uri() == Namespace::XMLNS && attribute.local_name() == "xmlns") {
 								                // The attribute's serialized name is the string "xmlns".
 								                builder.append("xmlns"sv);
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            // -> If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            else if (attribute.namespace_uri() == Namespace::XMLNS) {
 								                // The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
 								                builder.append("xmlns:"sv);
 								                builder.append(attribute.local_name());
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            // -> If the attribute is in the XLink namespace:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            else if (attribute.namespace_uri() == Namespace::XLink) {
 								                // The attribute's serialized name is the string "xlink:" followed by the attribute's local name.
 								                builder.append("xlink:"sv);
 								                builder.append(attribute.local_name());
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            // -> If the attribute is in some other namespace:
-												LibWeb: Serialize HTML attribute names as per spec

											
										
										
											2025-09-13 22:55:08 +02:00
+								            else {
 								                // The attribute's serialized name is the attribute's qualified name.
 								                builder.append(attribute.name());
 								            }
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
 								            builder.append("=\""sv);
-												LibWeb+LibUnicode+WebContent: Port DOM:CharacterData to UTF-16

This replaces the underlying storage of CharacterData with Utf16String
and deals with the fallout.

											
										
										
											2025-07-24 12:05:52 -04:00
+								            builder.append(escape_string(attribute.value().code_points(), AttributeMode::Yes));
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								            builder.append('"');
 								        });
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // Append a U+003E GREATER-THAN SIGN character (>).
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        builder.append('>');
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // If current node serializes as void, then continue on to the next child node at this point.
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        if (element.serializes_as_void())
 								            return IterationDecision::Continue;
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								        // Append the value of running the HTML fragment serialization algorithm with current node,
 								        // serializableShadowRoots, and shadowRoots (thus recursing into this algorithm for that node),
 								        // followed by a U+003C LESS-THAN SIGN character (<),
 								        // a U+002F SOLIDUS character (/),
 								        // tagname again,
 								        // and finally a U+003E GREATER-THAN SIGN character (>).
 								        builder.append(serialize_html_fragment(element, serializable_shadow_roots, shadow_roots));
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        builder.append("</"sv);
 								        builder.append(tag_name);
 								        builder.append('>');
 								        return IterationDecision::Continue;
 								    };
 								    if (fragment_serialization_mode == DOM::FragmentSerializationMode::Outer) {
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								        serialize_element(as<DOM::Element>(node));
-												LibWeb: Implement Element.outerHTML

This piggybacks on the same fragment serialization code that innerHTML
uses, but instead of constructing an imaginary parent element like the
spec asks us to, we just add a separate serialization mode that includes
the context element in the serialized markup.

This makes the image carousel on https://utah.edu/ show up :^)

											
										
										
											2024-04-09 14:44:58 +02:00
+								        return builder.to_string_without_validation();
 								    }
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								    // The algorithm takes as input a DOM Element, Document, or DocumentFragment referred to as the node.
 								    VERIFY(node.is_element() || node.is_document() || node.is_document_fragment());
-												LibGC+Everywhere: Factor out a LibGC from LibJS

Resulting in a massive rename across almost everywhere! Alongside the
namespace change, we now have the following names:

 * JS::NonnullGCPtr -> GC::Ref
 * JS::GCPtr -> GC::Ptr
 * JS::HeapFunction -> GC::Function
 * JS::CellImpl -> GC::Cell
 * JS::Handle -> GC::Root

											
										
										
											2024-11-15 04:01:23 +13:00
+								    GC::Ref<DOM::Node const> actual_node = node;
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
 								    if (is<DOM::Element>(node)) {
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								        auto const& element = as<DOM::Element>(node);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
 								        // 1. If the node serializes as void, then return the empty string.
 								        //    (NOTE: serializes as void is defined only on elements in the spec)
 								        if (element.serializes_as_void())
-												LibWeb: Port HTMLParser::serialize_html_fragment from DeprecatedString

											
										
										
											2023-11-10 09:46:54 +13:00
+								            return String {};
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
 								        // 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node).
 								        //    (NOTE: This is out of order of the spec to avoid another dynamic cast. The second step just creates a string builder, so it shouldn't matter)
 								        if (is<HTML::HTMLTemplateElement>(element))
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            actual_node = as<HTML::HTMLTemplateElement>(element).content();
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
 								        // 4. If current node is a shadow host, then:
 								        if (element.is_shadow_host()) {
 								            // 1. Let shadow be current node's shadow root.
 								            auto shadow = element.shadow_root();
 								            // 2. If one of the following is true:
 								            //    - serializableShadowRoots is true and shadow's serializable is true; or
 								            //    - shadowRoots contains shadow,
 								            if ((serializable_shadow_roots == SerializableShadowRoots::Yes && shadow->serializable())
-												AK+Everywhere: Add Vector::contains(predicate) and use it

No functional changes.

											
										
										
											2026-01-06 14:43:55 +01:00
+								                || shadow_roots.contains([&](auto& entry) { return entry == shadow; })) {
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                // then:
 								                // 1. Append "<template shadowrootmode="".
 								                builder.append("<template shadowrootmode=\""sv);
 								                // 2. If shadow's mode is "open", then append "open". Otherwise, append "closed".
 								                builder.append(shadow->mode() == Bindings::ShadowRootMode::Open ? "open"sv : "closed"sv);
 								                // 3. Append """.
 								                builder.append('"');
 								                // 4. If shadow's delegates focus is set, then append " shadowrootdelegatesfocus=""".
 								                if (shadow->delegates_focus())
 								                    builder.append(" shadowrootdelegatesfocus=\"\""sv);
 								                // 5. If shadow's serializable is set, then append " shadowrootserializable=""".
 								                if (shadow->serializable())
 								                    builder.append(" shadowrootserializable=\"\""sv);
-												LibWeb: Align declarative shadow root parsing

Teach the Rust parser to recognize declarative shadow root templates and
pass the parsed mode, slot assignment, clonable, serializable, and
focus-delegation flags to the C++ DOM host.

Expose shadowRootSlotAssignment reflection with the spec-defined named
missing and invalid value defaults, and extend the ShadowDOM text test
coverage for the reflected property and parser-created shadow roots.

											
										
										
											2026-05-16 09:05:40 +02:00
+								                // 6. If shadow's slot assignment is "manual", then append " shadowrootslotassignment="manual"".
 								                if (shadow->slot_assignment() == Bindings::SlotAssignmentMode::Manual)
 								                    builder.append(" shadowrootslotassignment=\"manual\""sv);
 								                // 7. If shadow's clonable is set, then append " shadowrootclonable=""".
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                if (shadow->clonable())
 								                    builder.append(" shadowrootclonable=\"\""sv);
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								                // 7. Let shouldAppendRegistryAttribute be the result of running these steps:
 								                auto should_append_registry_attribute = [&] {
 								                    // 1. Let documentRegistry be shadow's node document's custom element registry.
 								                    auto document_registry = shadow->document().custom_element_registry();
 								                    // 2. Let shadowRegistry be shadow's custom element registry.
 								                    auto shadow_registry = shadow->custom_element_registry();
 								                    // 3. If documentRegistry is null and shadowRegistry is null, then return false.
 								                    if (!document_registry && !shadow_registry)
 								                        return false;
 								                    // 4. If documentRegistry is a global custom element registry and shadowRegistry is a global custom
 								                    //    element registry, then return false.
 								                    if (is_a_global_custom_element_registry(document_registry) && is_a_global_custom_element_registry(shadow_registry))
 								                        return false;
 								                    // 5. Return true.
 								                    return true;
 								                }();
 								                // 8. If shouldAppendRegistryAttribute is true, then append " shadowrootcustomelementregistry=""".
 								                if (should_append_registry_attribute)
 								                    builder.append(" shadowrootcustomelementregistry=\"\""sv);
 								                // 9. Append ">".
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                builder.append('>');
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								                // 10. Append the value of running the HTML fragment serialization algorithm with shadow,
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                //    serializableShadowRoots, and shadowRoots (thus recursing into this algorithm for that element).
 								                builder.append(serialize_html_fragment(*shadow, serializable_shadow_roots, shadow_roots));
-												LibWeb: Implement scoped custom element registries

											
										
										
											2026-02-27 17:05:47 +00:00
+								                // 11. Append "</template>".
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                builder.append("</template>"sv);
 								            }
 								        }
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								    }
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								    // 5. For each child node of the node, in tree order, run the following steps:
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								    actual_node->for_each_child([&](DOM::Node& current_node) {
 								        // 1. Let current node be the child node being processed.
 								        // 2. Append the appropriate string from the following list to s:
 								        if (is<DOM::Element>(current_node)) {
 								            // -> If current node is an Element
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            auto& element = as<DOM::Element>(current_node);
-												LibWeb: Use `IterationDecision` in single level Node iteration methods

`Node::for_each_child()` and `Node::for_each_child_of_type()` callbacks
now return an `IterationDecision`, which allows us to break early if
required.

											
										
										
											2024-05-04 14:59:52 +01:00
+								            serialize_element(element);
 								            return IterationDecision::Continue;
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								        }
 								        if (is<DOM::Text>(current_node)) {
 								            // -> If current node is a Text node
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            auto& text_node = as<DOM::Text>(current_node);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								            auto* parent = current_node.parent();
 								            if (is<DOM::Element>(parent)) {
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								                auto& parent_element = as<DOM::Element>(*parent);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								                // If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element,
 								                // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally.
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								                if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext)
 								                    || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) {
 								                    builder.append(text_node.data());
 								                    return IterationDecision::Continue;
 								                }
 								            }
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								            // Otherwise, append the value of current node's data IDL attribute, escaped as described below.
-												LibWeb+LibUnicode+WebContent: Port DOM:CharacterData to UTF-16

This replaces the underlying storage of CharacterData with Utf16String
and deals with the fallout.

											
										
										
											2025-07-24 12:05:52 -04:00
+								            builder.append(escape_string(text_node.data().utf16_view(), AttributeMode::No));
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								        }
 								        if (is<DOM::Comment>(current_node)) {
 								            // -> If current node is a Comment
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            auto& comment_node = as<DOM::Comment>(current_node);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								            // Append the literal string "<!--" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS),
 								            // followed by the value of current node's data IDL attribute, followed by the literal string "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN).
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 17:32:29 +00:00
+								            builder.append("<!--"sv);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								            builder.append(comment_node.data());
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 17:32:29 +00:00
+								            builder.append("-->"sv);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								            return IterationDecision::Continue;
 								        }
 								        if (is<DOM::ProcessingInstruction>(current_node)) {
 								            // -> If current node is a ProcessingInstruction
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            auto& processing_instruction_node = as<DOM::ProcessingInstruction>(current_node);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								            // Append the literal string "<?" (U+003C LESS-THAN SIGN, U+003F QUESTION MARK), followed by the value of current node's target IDL attribute,
 								            // followed by a single U+0020 SPACE character, followed by the value of current node's data IDL attribute, followed by a single U+003E GREATER-THAN SIGN character (>).
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 17:32:29 +00:00
+								            builder.append("<?"sv);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								            builder.append(processing_instruction_node.target());
 								            builder.append(' ');
 								            builder.append(processing_instruction_node.data());
 								            builder.append('>');
 								            return IterationDecision::Continue;
 								        }
 								        if (is<DOM::DocumentType>(current_node)) {
 								            // -> If current node is a DocumentType
-												AK+Everywhere: Rename `verify_cast` to `as`

Follow-up to fc20e61e7249006247b43f84a3189d5a37fa103e.

											
										
										
											2025-01-21 09:12:05 -05:00
+								            auto& document_type_node = as<DOM::DocumentType>(current_node);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								            // Append the literal string "<!DOCTYPE" (U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+0044 LATIN CAPITAL LETTER D, U+004F LATIN CAPITAL LETTER O,
 								            // U+0043 LATIN CAPITAL LETTER C, U+0054 LATIN CAPITAL LETTER T, U+0059 LATIN CAPITAL LETTER Y, U+0050 LATIN CAPITAL LETTER P, U+0045 LATIN CAPITAL LETTER E),
 								            // followed by a space (U+0020 SPACE), followed by the value of current node's name IDL attribute, followed by the literal string ">" (U+003E GREATER-THAN SIGN).
-												Everywhere: Add sv suffix to strings relying on StringView(char const*)

Each of these strings would previously rely on StringView's char const*
constructor overload, which would call __builtin_strlen on the string.
Since we now have operator ""sv, we can replace these with much simpler
versions. This opens the door to being able to remove
StringView(char const*).

No functional changes.

											
										
										
											2022-07-11 17:32:29 +00:00
+								            builder.append("<!DOCTYPE "sv);
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								            builder.append(document_type_node.name());
 								            builder.append('>');
 								            return IterationDecision::Continue;
 								        }
 								        return IterationDecision::Continue;
 								    });
-												LibWeb: Update HTML fragment serialization for declarative shadow DOM

											
										
										
											2024-06-25 10:49:54 +02:00
+								    // 6. Return s.
-												LibWeb: Port HTMLParser::serialize_html_fragment from DeprecatedString

											
										
										
											2023-11-10 09:46:54 +13:00
+								    return MUST(builder.to_string());
-												LibWeb: Implement HTML fragment serialisation and use it in innerHTML

The previous implementation was about a half implementation and was
tied to Element::innerHTML. This separates it and puts it into
HTMLDocumentParser, as this is in the parsing section of the spec.

This provides a near finished HTML fragment serialisation algorithm,
bar namespaces in attributes and the `is` value.

											
										
										
											2021-09-13 22:42:15 +01:00
+								}
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#current-dimension-value
-												LibWeb: Move and rename CSSStyleValue to StyleValues/StyleValue.{h,cpp}

This reverts 0e3487b9ab455a7648185995363bb3b487551d40.

Back when I made that change, I thought we could make our StyleValue
classes match the typed-om definitions directly. However, they have
different requirements. Typed-om types need to be mutable and GCed,
whereas StyleValues are immutable and ideally wouldn't require a JS VM.

While I was already making such a cataclysmic change, I've moved it into
the StyleValues directory, because it *not* being there has bothered me
for a long time. 😅

											
										
										
											2025-08-08 10:11:51 +01:00
+								static RefPtr<CSS::StyleValue const> parse_current_dimension_value(float value, Utf8View input, Utf8View::Iterator position)
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								{
 								    // 1. If position is past the end of input, then return value as a length.
 								    if (position == input.end())
-												LibWeb: Add CSSPixels::nearest_value_for(FloatingPoint)

This is intended to annotate conversions from unknown floating-point
values to CSSPixels, and make it more obvious the fp value will be
rounded to the nearest fixed-point value.

											
										
										
											2023-08-26 15:57:31 +01:00
+								        return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
 								    // 2. If the code point at position within input is U+0025 (%), then return value as a percentage.
 								    if (*position == '%')
-												LibWeb: Make StyleValue constructors infallible

											
										
										
											2023-08-19 14:00:10 +01:00
+								        return CSS::PercentageStyleValue::create(CSS::Percentage(value));
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
 								    // 3. Return value as a length.
-												LibWeb: Add CSSPixels::nearest_value_for(FloatingPoint)

This is intended to annotate conversions from unknown floating-point
values to CSSPixels, and make it more obvious the fp value will be
rounded to the nearest fixed-point value.

											
										
										
											2023-08-26 15:57:31 +01:00
+								    return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								}
 								// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-dimension-values
-												LibWeb: Move and rename CSSStyleValue to StyleValues/StyleValue.{h,cpp}

This reverts 0e3487b9ab455a7648185995363bb3b487551d40.

Back when I made that change, I thought we could make our StyleValue
classes match the typed-om definitions directly. However, they have
different requirements. Typed-om types need to be mutable and GCed,
whereas StyleValues are immutable and ideally wouldn't require a JS VM.

While I was already making such a cataclysmic change, I've moved it into
the StyleValues directory, because it *not* being there has bothered me
for a long time. 😅

											
										
										
											2025-08-08 10:11:51 +01:00
+								RefPtr<CSS::StyleValue const> parse_dimension_value(StringView string)
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								{
 								    // 1. Let input be the string being parsed.
 								    auto input = Utf8View(string);
 								    if (!input.validate())
 								        return nullptr;
 								    // 2. Let position be a position variable for input, initially pointing at the start of input.
 								    auto position = input.begin();
 								    // 3. Skip ASCII whitespace within input given position.
-												LibWeb: Replace incorrect uses of AK::is_ascii_space()

											
										
										
											2022-10-01 18:14:32 +01:00
+								    while (position != input.end() && Infra::is_ascii_whitespace(*position))
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								        ++position;
 								    // 4. If position is past the end of input or the code point at position within input is not an ASCII digit,
 								    //    then return failure.
 								    if (position == input.end() || !is_ascii_digit(*position))
 								        return nullptr;
 								    // 5. Collect a sequence of code points that are ASCII digits from input given position,
 								    //    and interpret the resulting sequence as a base-ten integer. Let value be that number.
 								    StringBuilder number_string;
 								    while (position != input.end() && is_ascii_digit(*position)) {
 								        number_string.append(*position);
 								        ++position;
 								    }
-												LibWeb: Cap HTML dimension values at 17895700 (same as Firefox)

Instead of allowing arbitrarily large values (which could eventually
overflow an i32), let's just cap them at the same limit as Firefox does.

Found by Domato.

											
										
										
											2024-07-19 16:15:07 +02:00
+								    auto integer_value = number_string.string_view().to_number<double>();
-												LibWeb: Clamp layout content sizes to a max value instead of crashing

We've historically asserted that no "saturated" size values end up as
final metrics for boxes in layout. This always had a chance of producing
false positives, since you can trivially create extremely large boxes
with CSS.

The reason we had those assertions was to catch bugs in our own engine
code where we'd incorrectly end up with non-finite values in layout
algorithms. At this point, we've found and fixed all known bugs of that
nature, and what remains are a bunch of false positives on pages that
create very large scrollable areas, iframes etc.

So, let's change it! We now clamp content width and height of boxes to
17895700 pixels, apparently the same cap as Firefox uses.

There's also the issue of calc() being able to produce non-finite
values. Note that we don't clamp the result of calc() directly, but
instead just clamp values when assigning them to content sizes.

Fixes #645.
Fixes #1236.
Fixes #1249.
Fixes #1908.
Fixes #3057.

											
										
										
											2025-02-05 14:13:33 +01:00
+								    float value = min(*integer_value, CSSPixels::max_dimension_value);
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
 								    // 6. If position is past the end of input, then return value as a length.
 								    if (position == input.end())
-												LibWeb: Cap HTML dimension values at 17895700 (same as Firefox)

Instead of allowing arbitrarily large values (which could eventually
overflow an i32), let's just cap them at the same limit as Firefox does.

Found by Domato.

											
										
										
											2024-07-19 16:15:07 +02:00
+								        return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels(value)));
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
 								    // 7. If the code point at position within input is U+002E (.), then:
 								    if (*position == '.') {
 								        // 1. Advance position by 1.
 								        ++position;
 								        // 2. If position is past the end of input or the code point at position within input is not an ASCII digit,
 								        //    then return the current dimension value with value, input, and position.
 								        if (position == input.end() || !is_ascii_digit(*position))
 								            return parse_current_dimension_value(value, input, position);
 								        // 3. Let divisor have the value 1.
 								        float divisor = 1;
 								        // 4. While true:
 								        while (true) {
 								            // 1. Multiply divisor by ten.
 								            divisor *= 10;
 								            // 2. Add the value of the code point at position within input,
 								            //    interpreted as a base-ten digit (0..9) and divided by divisor, to value.
 								            value += (*position - '0') / divisor;
 								            // 3. Advance position by 1.
 								            ++position;
 								            // 4. If position is past the end of input, then return value as a length.
 								            if (position == input.end())
-												LibWeb: Add CSSPixels::nearest_value_for(FloatingPoint)

This is intended to annotate conversions from unknown floating-point
values to CSSPixels, and make it more obvious the fp value will be
rounded to the nearest fixed-point value.

											
										
										
											2023-08-26 15:57:31 +01:00
+								                return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value)));
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
 								            // 5. If the code point at position within input is not an ASCII digit, then break.
 								            if (!is_ascii_digit(*position))
 								                break;
 								        }
 								    }
 								    // 8. Return the current dimension value with value, input, and position.
 								    return parse_current_dimension_value(value, input, position);
 								}
 								// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-zero-dimension-values
-												LibWeb: Move and rename CSSStyleValue to StyleValues/StyleValue.{h,cpp}

This reverts 0e3487b9ab455a7648185995363bb3b487551d40.

Back when I made that change, I thought we could make our StyleValue
classes match the typed-om definitions directly. However, they have
different requirements. Typed-om types need to be mutable and GCed,
whereas StyleValues are immutable and ideally wouldn't require a JS VM.

While I was already making such a cataclysmic change, I've moved it into
the StyleValues directory, because it *not* being there has bothered me
for a long time. 😅

											
										
										
											2025-08-08 10:11:51 +01:00
+								RefPtr<CSS::StyleValue const> parse_nonzero_dimension_value(StringView string)
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								{
 								    // 1. Let input be the string being parsed.
 								    // 2. Let value be the result of parsing input using the rules for parsing dimension values.
 								    auto value = parse_dimension_value(string);
 								    // 3. If value is an error, return an error.
 								    if (!value)
 								        return nullptr;
 								    // 4. If value is zero, return an error.
-												LibWeb/CSS: Replace CSSUnitValue with DimensionStyleValue

CSSUnitValue is a typed-om type which we will implement separately in
the future. However, it still seems useful to give our dimension values
a base class. (Maybe they could be templated in the future?) So instead
of deleting it entirely, rename it to DimensionStyleValue and make its
API match our style better.

											
										
										
											2025-08-08 11:08:54 +01:00
+								    if (value->is_length() && value->as_length().raw_value() == 0)
-												LibWeb: Move HTML dimension value parsing from CSS to HTML namespace

These are part of HTML, not CSS, so let's not confuse things.

											
										
										
											2022-03-26 14:29:52 +01:00
+								        return nullptr;
 								    if (value->is_percentage() && value->as_percentage().percentage().value() == 0)
 								        return nullptr;
 								    // 5. If value is a percentage, return value as a percentage.
 								    // 6. Return value as a length.
 								    return value;
 								}
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								// https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-a-legacy-colour-value
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								Optional<Color> parse_legacy_color_value(StringView string_view)
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								{
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 1. If input is the empty string, then return failure.
 								    if (string_view.is_empty())
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        return {};
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    ByteString input = string_view;
 								    // 2. Strip leading and trailing ASCII whitespace from input.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    input = input.trim(Infra::ASCII_WHITESPACE);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 3. If input is an ASCII case-insensitive match for "transparent", then return failure.
-												LibWeb: Prefer using equals_ignoring_ascii_case

Which has an optmization if both size of the string being passed
through are FlyStrings, which actually ends up being the case
in some places during selector matching comparing attribute names.
Instead of maintaining more overloads of
Infra::is_ascii_case_insensitive_match, switch
everything over to equals_ignoring_ascii_case instead.

											
										
										
											2025-05-18 15:04:56 +12:00
+								    if (input.equals_ignoring_ascii_case("transparent"sv))
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        return {};
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 4. If input is an ASCII case-insensitive match for one of the named colors, then return the CSS color corresponding to that keyword. [CSSCOLOR]
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (auto const color = Color::from_named_css_color_string(input); color.has_value())
 								        return color;
 								    auto hex_nibble_to_u8 = [](char nibble) -> u8 {
 								        if (nibble >= '0' && nibble <= '9')
 								            return nibble - '0';
 								        if (nibble >= 'a' && nibble <= 'f')
 								            return nibble - 'a' + 10;
 								        return nibble - 'A' + 10;
 								    };
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 5. If input's code point length is four, and the first character in input is U+0023 (#), and the last three characters of input are all ASCII hex digits, then:
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (input.length() == 4 && input[0] == '#' && is_ascii_hex_digit(input[1]) && is_ascii_hex_digit(input[2]) && is_ascii_hex_digit(input[3])) {
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								        // 1. Let result be a CSS color.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        Color result;
 								        result.set_alpha(0xFF);
 								        // 2. Interpret the second character of input as a hexadecimal digit; let the red component of result be the resulting number multiplied by 17.
 								        result.set_red(hex_nibble_to_u8(input[1]) * 17);
 								        // 3. Interpret the third character of input as a hexadecimal digit; let the green component of result be the resulting number multiplied by 17.
 								        result.set_green(hex_nibble_to_u8(input[2]) * 17);
 								        // 4. Interpret the fourth character of input as a hexadecimal digit; let the blue component of result be the resulting number multiplied by 17.
 								        result.set_blue(hex_nibble_to_u8(input[3]) * 17);
 								        // 5. Return result.
 								        return result;
 								    }
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 6. Replace any code points greater than U+FFFF in input (i.e., any characters that are not in the basic multilingual plane) with "00".
-												Everywhere: Rename {Deprecated => Byte}String

This commit un-deprecates DeprecatedString, and repurposes it as a byte
string.
As the null state has already been removed, there are no other
particularly hairy blockers in repurposing this type as a byte string
(what it _really_ is).

This commit is auto-generated:
  $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \
    Meta Ports Ladybird Tests Kernel)
  $ perl -pie 's/\bDeprecatedString\b/ByteString/g;
    s/deprecated_string/byte_string/g' $xs
  $ clang-format --style=file -i \
    $(git diff --name-only | grep \.cpp\|\.h)
  $ gn format $(git ls-files '*.gn' '*.gni')

											
										
										
											2023-12-16 17:49:34 +03:30
+								    auto replace_non_basic_multilingual_code_points = [](StringView string) -> ByteString {
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        StringBuilder builder;
 								        for (auto code_point : Utf8View { string }) {
 								            if (code_point > 0xFFFF)
 								                builder.append("00"sv);
 								            else
 								                builder.append_code_point(code_point);
 								        }
-												Everywhere: Rename {Deprecated => Byte}String

This commit un-deprecates DeprecatedString, and repurposes it as a byte
string.
As the null state has already been removed, there are no other
particularly hairy blockers in repurposing this type as a byte string
(what it _really_ is).

This commit is auto-generated:
  $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \
    Meta Ports Ladybird Tests Kernel)
  $ perl -pie 's/\bDeprecatedString\b/ByteString/g;
    s/deprecated_string/byte_string/g' $xs
  $ clang-format --style=file -i \
    $(git diff --name-only | grep \.cpp\|\.h)
  $ gn format $(git ls-files '*.gn' '*.gni')

											
										
										
											2023-12-16 17:49:34 +03:30
+								        return builder.to_byte_string();
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    };
 								    input = replace_non_basic_multilingual_code_points(input);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 7. If input's code point length is greater than 128, truncate input, leaving only the first 128 characters.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (input.length() > 128)
 								        input = input.substring(0, 128);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 8. If the first character in input is U+0023 (#), then remove it.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (input[0] == '#')
 								        input = input.substring(1);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 9. Replace any character in input that is not an ASCII hex digit with U+0030 (0).
-												Everywhere: Rename {Deprecated => Byte}String

This commit un-deprecates DeprecatedString, and repurposes it as a byte
string.
As the null state has already been removed, there are no other
particularly hairy blockers in repurposing this type as a byte string
(what it _really_ is).

This commit is auto-generated:
  $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \
    Meta Ports Ladybird Tests Kernel)
  $ perl -pie 's/\bDeprecatedString\b/ByteString/g;
    s/deprecated_string/byte_string/g' $xs
  $ clang-format --style=file -i \
    $(git diff --name-only | grep \.cpp\|\.h)
  $ gn format $(git ls-files '*.gn' '*.gni')

											
										
										
											2023-12-16 17:49:34 +03:30
+								    auto replace_non_ascii_hex = [](StringView string) -> ByteString {
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        StringBuilder builder;
 								        for (auto code_point : Utf8View { string }) {
 								            if (is_ascii_hex_digit(code_point))
 								                builder.append_code_point(code_point);
 								            else
 								                builder.append_code_point('0');
 								        }
-												Everywhere: Rename {Deprecated => Byte}String

This commit un-deprecates DeprecatedString, and repurposes it as a byte
string.
As the null state has already been removed, there are no other
particularly hairy blockers in repurposing this type as a byte string
(what it _really_ is).

This commit is auto-generated:
  $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \
    Meta Ports Ladybird Tests Kernel)
  $ perl -pie 's/\bDeprecatedString\b/ByteString/g;
    s/deprecated_string/byte_string/g' $xs
  $ clang-format --style=file -i \
    $(git diff --name-only | grep \.cpp\|\.h)
  $ gn format $(git ls-files '*.gn' '*.gni')

											
										
										
											2023-12-16 17:49:34 +03:30
+								        return builder.to_byte_string();
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    };
 								    input = replace_non_ascii_hex(input);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 10. While input's code point length is zero or not a multiple of three, append U+0030 (0) to input.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    StringBuilder builder;
 								    builder.append(input);
 								    while (builder.length() == 0 || (builder.length() % 3 != 0))
 								        builder.append_code_point('0');
-												Everywhere: Rename {Deprecated => Byte}String

This commit un-deprecates DeprecatedString, and repurposes it as a byte
string.
As the null state has already been removed, there are no other
particularly hairy blockers in repurposing this type as a byte string
(what it _really_ is).

This commit is auto-generated:
  $ xs=$(ack -l \bDeprecatedString\b\|deprecated_string AK Userland \
    Meta Ports Ladybird Tests Kernel)
  $ perl -pie 's/\bDeprecatedString\b/ByteString/g;
    s/deprecated_string/byte_string/g' $xs
  $ clang-format --style=file -i \
    $(git diff --name-only | grep \.cpp\|\.h)
  $ gn format $(git ls-files '*.gn' '*.gni')

											
										
										
											2023-12-16 17:49:34 +03:30
+								    input = builder.to_byte_string();
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 11. Split input into three strings of equal code point length, to obtain three components. Let length be the code point length that all of those components have (one third the code point length of input).
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    auto length = input.length() / 3;
 								    auto first_component = input.substring_view(0, length);
 								    auto second_component = input.substring_view(length, length);
 								    auto third_component = input.substring_view(length * 2, length);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 12. If length is greater than 8, then remove the leading length-8 characters in each component, and let length be 8.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (length > 8) {
 								        first_component = first_component.substring_view(length - 8);
 								        second_component = second_component.substring_view(length - 8);
 								        third_component = third_component.substring_view(length - 8);
 								        length = 8;
 								    }
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 13. While length is greater than two and the first character in each component is U+0030 (0), remove that character and reduce length by one.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    while (length > 2 && first_component[0] == '0' && second_component[0] == '0' && third_component[0] == '0') {
 								        --length;
 								        first_component = first_component.substring_view(1);
 								        second_component = second_component.substring_view(1);
 								        third_component = third_component.substring_view(1);
 								    }
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 14. If length is still greater than two, truncate each component, leaving only the first two characters in each.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    if (length > 2) {
 								        first_component = first_component.substring_view(0, 2);
 								        second_component = second_component.substring_view(0, 2);
 								        third_component = third_component.substring_view(0, 2);
 								    }
 								    auto to_hex = [&](StringView string) -> u8 {
-												LibWeb: Fix StringView OOB access when parsing 3-character legacy color

Found by Domato.

											
										
										
											2024-07-19 14:44:10 +02:00
+								        if (length == 1) {
 								            return hex_nibble_to_u8(string[0]);
 								        }
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								        auto nib1 = hex_nibble_to_u8(string[0]);
 								        auto nib2 = hex_nibble_to_u8(string[1]);
 								        return nib1 << 4 | nib2;
 								    };
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 15. Let result be a CSS color.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    Color result;
 								    result.set_alpha(0xFF);
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 16. Interpret the first component as a hexadecimal number; let the red component of result be the resulting number.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    result.set_red(to_hex(first_component));
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 17. Interpret the second component as a hexadecimal number; let the green component of result be the resulting number.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    result.set_green(to_hex(second_component));
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 18. Interpret the third component as a hexadecimal number; let the blue component of result be the resulting number.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    result.set_blue(to_hex(third_component));
-												LibWeb/HTML: Update legacy color parsing steps to match spec

											
										
										
											2024-10-31 12:30:04 +00:00
+								    // 19. Return result.
-												LibWeb: Add Web::HTML::parse_legacy_color_value

This function follows the "rules for parsing a legacy color value"
which is used in some legacy attributes, such as 'bgcolor' in the body
element.

											
										
										
											2023-05-28 15:04:40 +12:00
+								    return result;
 								}
-												LibWeb/HTML: Support `align` attributes on table sections and rows

thead, tbody, tfoot, tr, td, and th all have an `align` presentational
attribute with identical definitions. We previously only supported it
for td and th, and also allowed arbitrary text-align values instead of
the 4 dictated by the spec.

											
										
										
											2026-04-30 11:15:16 +01:00
+								// https://html.spec.whatwg.org/multipage/rendering.html#tables-2
 								RefPtr<CSS::StyleValue const> parse_table_child_element_align_value(StringView string_view)
 								{
 								    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
 								    // case-insensitive match for either the string "center" or the string "middle", are expected to center text within
 								    // themselves, as if they had their 'text-align' property set to 'center' in a presentational hint, and to align
 								    // descendants to the center.
 								    if (string_view.equals_ignoring_ascii_case("center"sv) || string_view.equals_ignoring_ascii_case("middle"sv))
 								        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebCenter);
 								    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
 								    // case-insensitive match for the string "left", are expected to left-align text within themselves, as if they had
 								    // their 'text-align' property set to 'left' in a presentational hint, and to align descendants to the left.
 								    if (string_view.equals_ignoring_ascii_case("left"sv))
 								        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebLeft);
 								    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
 								    // case-insensitive match for the string "right", are expected to right-align text within themselves, as if they
 								    // had their 'text-align' property set to 'right' in a presentational hint, and to align descendants to the right.
 								    if (string_view.equals_ignoring_ascii_case("right"sv))
 								        return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebRight);
 								    // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII
 								    // case-insensitive match for the string "justify", are expected to full-justify text within themselves, as if they
 								    // had their 'text-align' property set to 'justify' in a presentational hint, and to align descendants to the left.
 								    if (string_view.equals_ignoring_ascii_case("justify"sv))
 								        return CSS::KeywordStyleValue::create(CSS::Keyword::Justify);
 								    return nullptr;
 								}
-												LibWeb+LibJS: Make the EventTarget hierarchy (incl. DOM) GC-allocated

This is a monster patch that turns all EventTargets into GC-allocated
PlatformObjects. Their C++ wrapper classes are removed, and the LibJS
garbage collector is now responsible for their lifetimes.

There's a fair amount of hacks and band-aids in this patch, and we'll
have a lot of cleanup to do after this.

											
										
										
											2022-08-28 13:42:07 +02:00
+								JS::Realm& HTMLParser::realm()
 								{
 								    return m_document->realm();
 								}
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#start-the-speculative-html-parser
 								void HTMLParser::start_the_speculative_html_parser()
 								{
 								    // 1. Optionally, return.
 								    // NOTE: We do not opt out.
 								    // 2. If parser's active speculative HTML parser is not null, then stop the speculative HTML parser for parser.
 								    if (m_active_speculative_html_parser)
 								        stop_the_speculative_html_parser();
 								    // 3. Let speculativeParser be a new speculative HTML parser, with the same state as parser.
 								    // 4. Let speculativeDoc be a new isomorphic representation of parser's Document, where all elements are instead
 								    //    speculative mock elements. Let speculativeParser parse into speculativeDoc.
 								    // NOTE: Speculative mock elements are produced on the fly during run(); we do not materialize a full speculativeDoc tree.
 								    auto speculative_parser = SpeculativeHTMLParser::create(realm(), *m_document, m_tokenizer.unparsed_input(), m_document->base_url());
 								    // 5. Set parser's active speculative HTML parser to speculativeParser.
 								    m_active_speculative_html_parser = speculative_parser;
 								    // 6. In parallel, run speculativeParser until it is stopped or until it reaches the end of its input stream.
 								    speculative_parser->run();
 								}
 								// https://html.spec.whatwg.org/multipage/parsing.html#stop-the-speculative-html-parser
 								void HTMLParser::stop_the_speculative_html_parser()
 								{
 								    // 1. Let speculativeParser be parser's active speculative HTML parser.
 								    auto speculative_parser = m_active_speculative_html_parser;
 								    // 2. If speculativeParser is null, then return.
 								    if (!speculative_parser)
 								        return;
 								    // 3. Throw away any pending content in speculativeParser's input stream, and discard any future content that would
 								    //    have been added to it.
 								    speculative_parser->stop();
 								    // 4. Set parser's active speculative HTML parser to null.
 								    m_active_speculative_html_parser = nullptr;
 								}
-												LibWeb: Implement aborting the HTML parser

This is roughly on-spec, although I had to invent a simple "aborted"
state for the tokenizer.

											
										
										
											2022-09-20 21:08:14 +02:00
+								// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
 								void HTMLParser::abort()
 								{
 								    // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it.
 								    m_tokenizer.abort();
-												LibWeb: Implement the speculative HTML parser

When the HTML parser blocks on a synchronous external script, run a
separate tokenizer over the unparsed input and issue speculative fetches
for the resources it finds (script src, link rel=stylesheet|preload, img
src), with <base href> tracking and template/foreign-content skipping.

Also fills in the previously-stubbed "consume a preloaded resource"
algorithm and the document's "map of preloaded resources", so that
<link rel="preload"> followed by a matching consumer deduplicates to
a single fetch.

											
										
										
											2026-04-26 03:21:39 +02:00
+								    // 2. Stop the speculative HTML parser for this HTML parser.
 								    stop_the_speculative_html_parser();
-												LibWeb: Implement aborting the HTML parser

This is roughly on-spec, although I had to invent a simple "aborted"
state for the tokenizer.

											
										
										
											2022-09-20 21:08:14 +02:00
 								    // 3. Update the current document readiness to "interactive".
 								    m_document->update_readiness(DocumentReadyState::Interactive);
 								    // 4. Pop all the nodes off the stack of open elements.
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    pop_all_open_elements();
-												LibWeb: Implement aborting the HTML parser

This is roughly on-spec, although I had to invent a simple "aborted"
state for the tokenizer.

											
										
										
											2022-09-20 21:08:14 +02:00
 								    // 5. Update the current document readiness to "complete".
 								    m_document->update_readiness(DocumentReadyState::Complete);
 								    m_aborted = true;
 								}
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								static StringView html_parser_ffi_string_view(u8 const* ptr, size_t len)
 								{
 								    if (ptr == nullptr || len == 0)
 								        return {};
 								    return { ptr, len };
 								}
 								static FlyString fly_string_from_html_parser_ffi(u8 const* ptr, size_t len)
 								{
 								    return MUST(FlyString::from_utf8(html_parser_ffi_string_view(ptr, len)));
 								}
 								static String string_from_html_parser_ffi(u8 const* ptr, size_t len)
 								{
 								    return MUST(String::from_utf8(html_parser_ffi_string_view(ptr, len)));
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								extern "C" void ladybird_html_parser_log_parse_error(void* parser, u8 const* message_ptr, size_t message_len)
 								{
 								    (void)parser_from_html_parser_ffi(parser);
 								    dbgln_if(HTML_PARSER_DEBUG, "Rust parser parse error: {}", html_parser_ffi_string_view(message_ptr, message_len));
 								}
 								extern "C" void ladybird_html_parser_stop_parsing(void* parser)
 								{
 								    parser_from_html_parser_ffi(parser).stop_parsing_from_rust_parser();
 								}
 								extern "C" bool ladybird_html_parser_parse_errors_enabled()
 								{
 								    return HTML_PARSER_DEBUG;
 								}
 								extern "C" void ladybird_html_parser_visit_node(void* visitor, size_t node)
 								{
 								    if (node == 0)
 								        return;
 								    static_cast<GC::Cell::Visitor*>(visitor)->visit(node_from_html_parser_ffi(node));
 								}
 								static Optional<FlyString> namespace_from_html_parser_ffi(RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    switch (namespace_) {
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlNamespace::Html:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return Namespace::HTML;
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlNamespace::MathMl:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return Namespace::MathML;
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlNamespace::Svg:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return Namespace::SVG;
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    case RustFfiHtmlNamespace::Other:
 								        if (namespace_uri_len == 0)
 								            return {};
 								        return fly_string_from_html_parser_ffi(namespace_uri_ptr, namespace_uri_len);
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								    }
 								    VERIFY_NOT_REACHED();
 								}
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								static Optional<FlyString> attribute_namespace_from_html_parser_ffi(RustFfiHtmlAttributeNamespace namespace_)
 								{
 								    switch (namespace_) {
 								    case RustFfiHtmlAttributeNamespace::None:
 								        return {};
 								    case RustFfiHtmlAttributeNamespace::XLink:
 								        return Namespace::XLink;
 								    case RustFfiHtmlAttributeNamespace::Xml:
 								        return Namespace::XML;
 								    case RustFfiHtmlAttributeNamespace::Xmlns:
 								        return Namespace::XMLNS;
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    case RustFfiHtmlAttributeNamespace::Other:
 								        // Only fragment context attributes use this sentinel; parser-created attributes do not cross this path with
 								        // arbitrary namespace URIs.
 								        VERIFY_NOT_REACHED();
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    }
 								    VERIFY_NOT_REACHED();
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional<FlyString> const& namespace_)
 								{
 								    if (namespace_ == Namespace::XLink)
 								        return RustFfiHtmlAttributeNamespace::XLink;
 								    if (namespace_ == Namespace::XML)
 								        return RustFfiHtmlAttributeNamespace::Xml;
 								    if (namespace_ == Namespace::XMLNS)
 								        return RustFfiHtmlAttributeNamespace::Xmlns;
 								    if (namespace_.has_value())
 								        return RustFfiHtmlAttributeNamespace::Other;
 								    return RustFfiHtmlAttributeNamespace::None;
 								}
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional<FlyString> const& namespace_)
 								{
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    if (!namespace_.has_value())
 								        return RustFfiHtmlNamespace::Other;
 								    if (namespace_ == Namespace::HTML)
 								        return RustFfiHtmlNamespace::Html;
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    if (namespace_ == Namespace::MathML)
 								        return RustFfiHtmlNamespace::MathMl;
 								    if (namespace_ == Namespace::SVG)
 								        return RustFfiHtmlNamespace::Svg;
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    return RustFfiHtmlNamespace::Other;
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								static DOM::QuirksMode quirks_mode_from_html_parser_ffi(RustFfiHtmlQuirksMode mode)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    switch (mode) {
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlQuirksMode::No:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return DOM::QuirksMode::No;
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlQuirksMode::Limited:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return DOM::QuirksMode::Limited;
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    case RustFfiHtmlQuirksMode::Yes:
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								        return DOM::QuirksMode::Yes;
 								    }
 								    VERIFY_NOT_REACHED();
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode mode)
 								{
 								    switch (mode) {
 								    case DOM::QuirksMode::No:
 								        return RustFfiHtmlQuirksMode::No;
 								    case DOM::QuirksMode::Limited:
 								        return RustFfiHtmlQuirksMode::Limited;
 								    case DOM::QuirksMode::Yes:
 								        return RustFfiHtmlQuirksMode::Yes;
 								    }
 								    VERIFY_NOT_REACHED();
 								}
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								static HTMLParser& parser_from_html_parser_ffi(void* parser)
 								{
 								    VERIFY(parser);
 								    return *reinterpret_cast<HTMLParser*>(parser);
 								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								static DOM::Node& node_from_html_parser_ffi(size_t node)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    VERIFY(node);
 								    return *reinterpret_cast<DOM::Node*>(node);
 								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" size_t ladybird_html_parser_document_node(void* parser)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    return reinterpret_cast<size_t>(&parser_from_html_parser_ffi(parser).document());
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								extern "C" size_t ladybird_html_parser_document_html_element(void* parser)
 								{
 								    auto* html_element = parser_from_html_parser_ffi(parser).document().document_element();
 								    if (!html_element || !is<HTMLHtmlElement>(*html_element))
 								        return 0;
 								    return reinterpret_cast<size_t>(html_element);
 								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" void ladybird_html_parser_set_document_quirks_mode(void* parser, RustFfiHtmlQuirksMode mode)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    auto& document = parser_from_html_parser_ffi(parser).document();
 								    if (!document.parser_cannot_change_the_mode())
 								        document.set_quirks_mode(quirks_mode_from_html_parser_ffi(mode));
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" size_t ladybird_html_parser_create_document_type(void* parser, u8 const* name_ptr, size_t name_len, u8 const* public_id_ptr, size_t public_id_len, u8 const* system_id_ptr, size_t system_id_len)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    auto& html_parser = parser_from_html_parser_ffi(parser);
 								    auto document_type = html_parser.document().realm().create<DOM::DocumentType>(html_parser.document());
 								    document_type->set_name(string_from_html_parser_ffi(name_ptr, name_len));
 								    document_type->set_public_id(string_from_html_parser_ffi(public_id_ptr, public_id_len));
 								    document_type->set_system_id(string_from_html_parser_ffi(system_id_ptr, system_id_len));
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    return reinterpret_cast<size_t>(document_type.ptr());
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" size_t ladybird_html_parser_create_comment(void* parser, u8 const* data_ptr, size_t data_len)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    auto& html_parser = parser_from_html_parser_ffi(parser);
 								    auto comment = html_parser.document().realm().create<DOM::Comment>(html_parser.document(), Utf16String::from_utf8(string_from_html_parser_ffi(data_ptr, data_len)));
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    return reinterpret_cast<size_t>(comment.ptr());
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								extern "C" void ladybird_html_parser_insert_text(size_t parent, size_t before, u8 const* data_ptr, size_t data_len)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    auto& parent_node = node_from_html_parser_ffi(parent);
 								    if (parent_node.is_document())
 								        return;
 								    auto data = Utf16String::from_utf8(string_from_html_parser_ffi(data_ptr, data_len));
 								    if (before) {
 								        auto& before_node = node_from_html_parser_ffi(before);
 								        if (auto* previous_text = as_if<DOM::Text>(before_node.previous_sibling())) {
 								            (void)previous_text->append_data(data);
 								            return;
 								        }
 								        auto text = parent_node.document().realm().create<DOM::Text>(parent_node.document(), data);
 								        parent_node.insert_before(*text, &before_node);
 								        return;
 								    }
 								    if (auto* last_text = as_if<DOM::Text>(parent_node.last_child())) {
 								        (void)last_text->append_data(data);
 								        return;
 								    }
 								    auto text = parent_node.document().realm().create<DOM::Text>(parent_node.document(), data);
 								    MUST(parent_node.append_child(*text));
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								extern "C" void ladybird_html_parser_add_missing_attribute(size_t element, u8 const* local_name_ptr, size_t local_name_len, u8 const* value_ptr, size_t value_len)
 								{
 								    auto& dom_element = as<DOM::Element>(node_from_html_parser_ffi(element));
 								    auto local_name = fly_string_from_html_parser_ffi(local_name_ptr, local_name_len);
 								    if (dom_element.has_attribute(local_name))
 								        return;
 								    dom_element.append_attribute(local_name, string_from_html_parser_ffi(value_ptr, value_len));
 								}
 								extern "C" void ladybird_html_parser_remove_node(size_t node)
 								{
 								    node_from_html_parser_ffi(node).remove(true);
 								}
 								extern "C" void ladybird_html_parser_handle_element_popped(size_t element)
 								{
 								    // https://html.spec.whatwg.org/multipage/form-elements.html#the-option-element
 								    // When an option element is popped off the stack of open elements of an HTML parser or XML parser,
 								    // the user agent must run maybe clone an option into selectedcontent given the option element.
 								    // AD-HOC: The Rust tree builder flushes buffered text before invoking this hook, so the option's content is
 								    // up-to-date before cloning.
 								    if (auto* option_element = as_if<HTML::HTMLOptionElement>(node_from_html_parser_ffi(element)))
 								        MUST(option_element->maybe_clone_into_selectedcontent());
 								}
 								extern "C" void ladybird_html_parser_prepare_svg_script(void* parser, size_t element, size_t source_line_number)
 								{
 								    parser_from_html_parser_ffi(parser).prepare_svg_script_for_rust_parser(as<SVG::SVGScriptElement>(node_from_html_parser_ffi(element)), source_line_number);
 								}
 								extern "C" void ladybird_html_parser_set_script_source_line(void* parser, size_t element, size_t source_line_number)
 								{
 								    parser_from_html_parser_ffi(parser).set_script_source_line_from_rust_parser(as<DOM::Element>(node_from_html_parser_ffi(element)), source_line_number);
 								}
 								extern "C" void ladybird_html_parser_mark_script_already_started(void* parser, size_t element)
 								{
 								    if (auto* script = as_if<HTMLScriptElement>(node_from_html_parser_ffi(element)))
 								        parser_from_html_parser_ffi(parser).mark_script_already_started_from_rust_parser(*script);
 								}
 								extern "C" size_t ladybird_html_parser_parent_node(size_t node)
 								{
 								    auto* parent = node_from_html_parser_ffi(node).parent();
 								    return reinterpret_cast<size_t>(parent);
 								}
 								extern "C" size_t ladybird_html_parser_create_element(void* parser, size_t intended_parent, RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len, u8 const* local_name_ptr, size_t local_name_len, RustFfiHtmlParserAttribute const* attributes, size_t attribute_count, bool had_duplicate_attribute, size_t form_element, bool has_template_element_on_stack)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    auto& html_parser = parser_from_html_parser_ffi(parser);
 								    auto local_name = fly_string_from_html_parser_ffi(local_name_ptr, local_name_len);
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								    auto token = HTMLToken::make_start_tag(local_name);
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
 								    for (size_t i = 0; i < attribute_count; ++i) {
 								        auto const& attribute = attributes[i];
-												LibWeb: Wire Rust parser scripts and fragments

Preserve Rust parser state across tokenizer runs and stop cleanly when
a parser-blocking script has to execute. Thread the pending script back
through the existing C++ parser entry point so document.write(), input
insertion points, and script bookkeeping continue to use the normal
LibWeb machinery.

Add the fragment parser setup needed by innerHTML and contextual
fragment parsing, including context elements, form ownership, tokenizer
state selection, text coalescing, and foreign-content integration.

											
										
										
											2026-05-15 20:57:22 +02:00
+								        Optional<FlyString> prefix;
 								        if (attribute.prefix_len != 0)
 								            prefix = fly_string_from_html_parser_ffi(attribute.prefix_ptr, attribute.prefix_len);
 								        HTMLToken::Attribute token_attribute;
 								        token_attribute.prefix = move(prefix);
 								        token_attribute.local_name = fly_string_from_html_parser_ffi(attribute.local_name_ptr, attribute.local_name_len);
 								        token_attribute.namespace_ = attribute_namespace_from_html_parser_ffi(attribute.namespace_);
 								        token_attribute.value = string_from_html_parser_ffi(attribute.value_ptr, attribute.value_len);
 								        token.add_attribute(move(token_attribute));
 								    }
 								    auto& intended_parent_node = node_from_html_parser_ffi(intended_parent);
 								    GC::Ptr<HTMLFormElement> form_element_ptr;
 								    if (form_element)
 								        form_element_ptr = as<HTMLFormElement>(node_from_html_parser_ffi(form_element));
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								    auto element = html_parser.create_element_for_rust_parser(token, namespace_from_html_parser_ffi(namespace_, namespace_uri_ptr, namespace_uri_len), intended_parent_node, had_duplicate_attribute, form_element_ptr, has_template_element_on_stack);
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								    return reinterpret_cast<size_t>(element.ptr());
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" void ladybird_html_parser_append_child(size_t parent, size_t child)
-												LibWeb: Add Rust HTML parser host plumbing

Add the C++ and Rust scaffolding that lets the tree builder live in
Rust while the DOM remains owned by LibWeb. Keep the exported surface
small: Rust stores parser state, and C++ provides node creation,
insertion, script, template, and GC hooks.

Route dump-html-tree through the selectable parser backend so the new
implementation can be exercised beside the existing parser while it is
being brought up.

											
										
										
											2026-05-15 20:29:23 +02:00
+								{
 								    MUST(node_from_html_parser_ffi(parent).append_child(node_from_html_parser_ffi(child)));
 								}
-												LibWeb: Complete Rust HTML tree construction

Finish the Rust implementation of the spec tree-construction algorithms
needed by the LibWeb test suite. Add the remaining table modes, foster
parenting, scope helpers, adoption agency handling, ruby/list/form and
select cases, frameset state, foreign-content edge cases, and parser
host callbacks.

Preserve behavior that depends on the C++ DOM integration, including
parser-created custom element reactions, fragment quirks mode, arbitrary
fragment namespaces, template fragment mode, fragment form ownership,
MathML annotation-xml boundaries, contextual fragment scripts, parser
script source positions, document.close() parser state, void-element
insertion, and duplicate attribute tracking.

Add focused tests for the parser edge cases that are easy to regress at
the boundary between the Rust tree builder and the C++ DOM host.

											
										
										
											2026-05-15 21:56:35 +02:00
+								extern "C" void ladybird_html_parser_insert_node(size_t parent, size_t before, size_t child, bool queue_custom_element_reactions)
 								{
 								    auto& parent_node = node_from_html_parser_ffi(parent);
 								    auto& child_node = node_from_html_parser_ffi(child);
 								    auto* child_element = as_if<DOM::Element>(child_node);
 								    if (queue_custom_element_reactions && child_element)
 								        relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.append({});
 								    if (!before) {
 								        MUST(parent_node.append_child(child_node));
 								    } else {
 								        auto& before_node = node_from_html_parser_ffi(before);
 								        parent_node.insert_before(child_node, &before_node, false);
 								    }
 								    if (queue_custom_element_reactions && child_element) {
 								        auto queue = relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.take_last();
 								        Bindings::invoke_custom_element_reactions(queue);
 								    }
 								}
 								extern "C" void ladybird_html_parser_move_all_children(size_t from, size_t to)
 								{
 								    auto& from_node = node_from_html_parser_ffi(from);
 								    auto& to_node = node_from_html_parser_ffi(to);
 								    for (auto& child : from_node.children_as_vector())
 								        MUST(to_node.append_child(from_node.remove_child(*child).release_value()));
 								}
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								extern "C" size_t ladybird_html_parser_template_content(size_t element)
 								{
 								    auto& template_element = as<HTMLTemplateElement>(node_from_html_parser_ffi(element));
 								    return reinterpret_cast<size_t>(template_element.content().ptr());
 								}
-												LibWeb: Align declarative shadow root parsing

Teach the Rust parser to recognize declarative shadow root templates and
pass the parsed mode, slot assignment, clonable, serializable, and
focus-delegation flags to the C++ DOM host.

Expose shadowRootSlotAssignment reflection with the spec-defined named
missing and invalid value defaults, and extend the ShadowDOM text test
coverage for the reflected property and parser-created shadow roots.

											
										
										
											2026-05-16 09:05:40 +02:00
+								extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t host, RustFfiHtmlShadowRootMode mode, RustFfiHtmlSlotAssignmentMode slot_assignment, bool clonable, bool serializable, bool delegates_focus, bool keep_custom_element_registry_null)
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								{
 								    auto& host_element = as<DOM::Element>(node_from_html_parser_ffi(host));
 								    if (host_element.is_shadow_host())
 								        return 0;
 								    GC::Ptr<CustomElementRegistry> registry;
 								    if (!keep_custom_element_registry_null)
 								        registry = host_element.document().custom_element_registry();
 								    auto result = host_element.attach_a_shadow_root(
 								        mode == RustFfiHtmlShadowRootMode::Open ? Bindings::ShadowRootMode::Open : Bindings::ShadowRootMode::Closed,
 								        clonable,
 								        serializable,
 								        delegates_focus,
-												LibWeb: Align declarative shadow root parsing

Teach the Rust parser to recognize declarative shadow root templates and
pass the parsed mode, slot assignment, clonable, serializable, and
focus-delegation flags to the C++ DOM host.

Expose shadowRootSlotAssignment reflection with the spec-defined named
missing and invalid value defaults, and extend the ShadowDOM text test
coverage for the reflected property and parser-created shadow roots.

											
										
										
											2026-05-16 09:05:40 +02:00
+								        slot_assignment == RustFfiHtmlSlotAssignmentMode::Manual ? Bindings::SlotAssignmentMode::Manual : Bindings::SlotAssignmentMode::Named,
-												LibWeb: Add initial Rust HTML tree construction

Implement the first Rust tree builder pass around the tokenizer and the
LibWeb DOM host hooks. Cover the document setup, insertion-mode
dispatch, ordinary body insertion, basic table handling, active
formatting element reconstruction, and foreign-content routing.

Leave the C++ parser available at runtime so the new path can be tested
against the old implementation while the remaining tree-construction
algorithms are filled in.

											
										
										
											2026-05-15 20:39:06 +02:00
+								        registry);
 								    if (result.is_error())
 								        return 0;
 								    auto shadow_root = host_element.shadow_root();
 								    VERIFY(shadow_root);
 								    shadow_root->set_declarative(true);
 								    shadow_root->set_available_to_element_internals(true);
 								    if (keep_custom_element_registry_null)
 								        shadow_root->set_keep_custom_element_registry_null(true);
 								    return reinterpret_cast<size_t>(shadow_root.ptr());
 								}
-												LibWeb: Align declarative shadow root parsing

Teach the Rust parser to recognize declarative shadow root templates and
pass the parsed mode, slot assignment, clonable, serializable, and
focus-delegation flags to the C++ DOM host.

Expose shadowRootSlotAssignment reflection with the spec-defined named
missing and invalid value defaults, and extend the ShadowDOM text test
coverage for the reflected property and parser-created shadow roots.

											
										
										
											2026-05-16 09:05:40 +02:00
+								extern "C" void ladybird_html_parser_set_template_content(size_t element, size_t content)
 								{
 								    as<HTMLTemplateElement>(node_from_html_parser_ffi(element)).set_template_contents(as<DOM::DocumentFragment>(node_from_html_parser_ffi(content)));
 								}
 								extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t node)
 								{
 								    return node_from_html_parser_ffi(node).document().allow_declarative_shadow_roots();
 								}
-												LibWeb: Start building the tree building part of the new HTML parser

This patch adds a new HTMLDocumentParser class. It keeps a tokenizer
object internally and feeds itself with one token at a time from it.

The names and idioms in this class are expressed as closely to the
actual HTML parsing spec as possible, to make development as easy
and bug free as possible. :^)

This is going to become pretty large, but it's pretty cool!

											
										
										
											2020-05-24 00:14:23 +02:00
+								}