/* * Copyright (c) 2020-2025, Andreas Kling * Copyright (c) 2021, Luke Wilde * Copyright (c) 2023-2024, Shannon Booth * Copyright (c) 2025, Lorenz Ackermann * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace Web::HTML { GC_DEFINE_ALLOCATOR(HTMLParser); GC_DEFINE_ALLOCATOR(HTMLParserEndState); static DOM::Node& node_from_html_parser_ffi(size_t); static HTMLParser& parser_from_html_parser_ffi(void*); static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional const&); static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional const&); static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode); extern "C" void ladybird_html_parser_log_parse_error(void*, u8 const*, size_t); extern "C" void ladybird_html_parser_stop_parsing(void*); extern "C" bool ladybird_html_parser_parse_errors_enabled(); extern "C" void ladybird_html_parser_visit_node(void*, size_t); extern "C" size_t ladybird_html_parser_document_node(void*); extern "C" size_t ladybird_html_parser_document_html_element(void*); extern "C" void ladybird_html_parser_set_document_quirks_mode(void*, RustFfiHtmlQuirksMode); extern "C" size_t ladybird_html_parser_create_document_type(void*, u8 const*, size_t, u8 const*, size_t, u8 const*, size_t); extern "C" size_t ladybird_html_parser_create_comment(void*, u8 const*, size_t); extern "C" void ladybird_html_parser_insert_text(size_t, size_t, u8 const*, size_t); extern "C" void ladybird_html_parser_add_missing_attribute(size_t, u8 const*, size_t, u8 const*, size_t); extern "C" void ladybird_html_parser_remove_node(size_t); extern "C" void ladybird_html_parser_handle_element_popped(size_t); extern "C" void ladybird_html_parser_prepare_svg_script(void*, size_t, size_t); extern "C" void ladybird_html_parser_set_script_source_line(void*, size_t, size_t); extern "C" void ladybird_html_parser_mark_script_already_started(void*, size_t); extern "C" size_t ladybird_html_parser_parent_node(size_t); extern "C" size_t ladybird_html_parser_create_element(void*, size_t, RustFfiHtmlNamespace, u8 const*, size_t, u8 const*, size_t, RustFfiHtmlParserAttribute const*, size_t, bool, size_t, bool); extern "C" void ladybird_html_parser_append_child(size_t, size_t); extern "C" void ladybird_html_parser_insert_node(size_t, size_t, size_t, bool); extern "C" void ladybird_html_parser_move_all_children(size_t, size_t); extern "C" size_t ladybird_html_parser_template_content(size_t); extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t, RustFfiHtmlShadowRootMode, RustFfiHtmlSlotAssignmentMode, bool, bool, bool, bool); extern "C" void ladybird_html_parser_set_template_content(size_t, size_t); extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t); HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, StringView input, StringView encoding, HTMLTokenizer::InputType input_type) : m_tokenizer(input, encoding, input_type) , m_scripting_mode(scripting_mode) , m_document(document) { m_rust_parser = rust_html_parser_create(); m_document->set_parser({}, *this); auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); VERIFY(standardized_encoding.has_value()); m_document->set_encoding(MUST(String::from_utf8(standardized_encoding.value()))); } HTMLParser::HTMLParser(DOM::Document& document, ParserScriptingMode scripting_mode, ScriptCreatedParser script_created) : m_scripting_mode(scripting_mode) , m_script_created(script_created == ScriptCreatedParser::Yes) , m_document(document) { m_rust_parser = rust_html_parser_create(); m_document->set_parser({}, *this); } HTMLParser::~HTMLParser() = default; void HTMLParser::finalize() { Base::finalize(); if (m_rust_parser) { rust_html_parser_destroy(m_rust_parser); m_rust_parser = nullptr; } } void HTMLParser::visit_edges(Cell::Visitor& visitor) { Base::visit_edges(visitor); visitor.visit(m_document); visitor.visit(m_form_element); visitor.visit(m_context_element); visitor.visit(m_active_speculative_html_parser); rust_html_parser_visit_edges(m_rust_parser, &visitor); } void HTMLParser::initialize(JS::Realm& realm) { Base::initialize(realm); } void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) { m_stop_parsing = false; for (;;) { if (m_parser_pause_flag) break; auto result = rust_html_parser_run_document( m_rust_parser, m_tokenizer.ffi_handle({}), this, m_scripting_mode != ParserScriptingMode::Disabled, stop_at_insertion_point == HTMLTokenizer::StopAtInsertionPoint::Yes); if (result == RustFfiHtmlParserRunResult::Ok) break; if (result == RustFfiHtmlParserRunResult::ExecuteScript) { auto script = rust_html_parser_take_pending_script(m_rust_parser); VERIFY(script); process_script_end_tag_from_rust_parser(as(node_from_html_parser_ffi(script))); continue; } if (result == RustFfiHtmlParserRunResult::ExecuteSvgScript) { auto script = rust_html_parser_take_pending_svg_script(m_rust_parser); VERIFY(script); if (process_svg_script_end_tag_from_rust_parser(as(node_from_html_parser_ffi(script)))) break; continue; } VERIFY_NOT_REACHED(); } m_tokenizer.parser_did_run({}); } void HTMLParser::run(URL::URL const& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) { m_document->set_url(url); m_document->set_source(m_tokenizer.source()); run_until_completion(stop_at_insertion_point); } void HTMLParser::pop_all_open_elements() { rust_html_parser_pop_all_open_elements(m_rust_parser); } void HTMLParser::configure_element_created_by_rust_parser(DOM::Element& element) { if (element.local_name() == HTML::TagNames::link && element.namespace_uri() == Namespace::HTML) { // AD-HOC: Let elements know which document they were originally parsed for. // This is used for the render-blocking logic. auto& link_element = as(element); link_element.set_parser_document({}, document()); link_element.set_was_enabled_when_created_by_parser({}, !element.has_attribute(HTML::AttributeNames::disabled)); return; } if (element.local_name() != HTML::TagNames::script || element.namespace_uri() != Namespace::HTML) return; auto& script_element = as(element); if (m_scripting_mode != ParserScriptingMode::Fragment) script_element.set_parser_document(Badge {}, document()); script_element.set_force_async(Badge {}, false); if (m_scripting_mode == ParserScriptingMode::Inert) script_element.set_already_started(Badge {}, true); } GC::Ref HTMLParser::create_element_for_rust_parser(HTMLToken const& token, Optional const& namespace_, DOM::Node& intended_parent, bool had_duplicate_attribute, GC::Ptr form_element, bool has_template_element_on_stack) { auto element = create_element_for(token, namespace_, intended_parent); configure_element_created_by_rust_parser(element); // AD-HOC: See AD-HOC comment on Element.m_had_duplicate_attribute_during_tokenization about why this is done. if (had_duplicate_attribute) element->set_had_duplicate_attribute_during_tokenization({}); if (form_element && !has_template_element_on_stack) { auto* html_element = as_if(*element); if (html_element && html_element->is_form_associated_element() && !html_element->is_form_associated_custom_element()) { if ((!html_element->is_listed() || !html_element->has_attribute(HTML::AttributeNames::form)) && &intended_parent.root() == &form_element->root()) { html_element->set_form(form_element.ptr()); html_element->set_parser_inserted({}); } } } return element; } bool HTMLParser::process_script_end_tag_from_rust_parser(HTMLScriptElement& script) { // If the active speculative HTML parser is null and the JavaScript execution context stack is empty, then perform a microtask checkpoint. // The active speculative HTML parser is null here; start/stop are paired around the spin_until below. auto& vm = main_thread_event_loop().vm(); if (!vm.has_running_execution_context()) perform_a_microtask_checkpoint(); // Let the old insertion point have the same value as the current insertion point. m_tokenizer.store_old_insertion_point(); // Let the insertion point be just before the next input character. m_tokenizer.update_insertion_point(); // Increment the parser's script nesting level by one. increment_script_nesting_level(); // https://w3c.github.io/trusted-types/dist/spec/#setting-slot-values-from-parser // Set script’s script text value to its child text content. script.set_string_text(script.child_text_content()); // If the active speculative HTML parser is null, then prepare the script element script. // This might cause some script to execute, which might cause new characters to be inserted into the tokenizer, // and might cause the tokenizer to output more tokens, resulting in a reentrant invocation of the parser. // The active speculative HTML parser is null here (see above). script.prepare_script(Badge {}); // Decrement the parser's script nesting level by one. decrement_script_nesting_level(); // If the parser's script nesting level is zero, then set the parser pause flag to false. if (script_nesting_level() == 0) m_parser_pause_flag = false; // Let the insertion point have the value of the old insertion point. m_tokenizer.restore_old_insertion_point(); // At this stage, if the pending parsing-blocking script is not null, then: if (document().pending_parsing_blocking_script()) { // -> If the script nesting level is not zero: if (script_nesting_level() != 0) { // Set the parser pause flag to true, m_parser_pause_flag = true; // and abort the processing of any nested invocations of the tokenizer, yielding control back to the caller. // (Tokenization will resume when the caller returns to the "outer" tree construction stage.) return true; } // -> Otherwise: // The spec's "While the pending parsing-blocking script is not null" loop and the contained "spin the event // loop" step are implemented asynchronously: pause the parser, schedule a resume check, and yield back to // the caller. The remaining steps (4-13) run from resume_after_parser_blocking_script when the script is // ready. // 3. Start the speculative HTML parser for this instance of the HTML parser. start_the_speculative_html_parser(); m_parser_pause_flag = true; schedule_resume_check(); } return m_parser_pause_flag; } void HTMLParser::prepare_svg_script_for_rust_parser(SVG::SVGScriptElement& script, size_t source_line_number) { // AD-HOC: For SVG script elements, set the parser-inserted flag before the element is inserted into the DOM. // Otherwise inserted()/attribute_changed() would invoke process_the_script_element() with the flag still unset // and bypass the parser-blocking fetch handling. // // https://html.spec.whatwg.org/multipage/parsing.html#scripting-mode // The Fragment scripting mode treats parser-inserted scripts as if they were not parser-inserted, allowing, for // example, executing scripts when applying a fragment created by createContextualFragment(). if (m_scripting_mode != ParserScriptingMode::Fragment) script.set_parser_inserted({}); script.set_source_line_number({}, source_line_number); } void HTMLParser::set_script_source_line_from_rust_parser(DOM::Element& element, size_t source_line_number) { if (auto* html_script_element = as_if(element)) { html_script_element->set_source_line_number({}, source_line_number); return; } if (auto* svg_script_element = as_if(element)) svg_script_element->set_source_line_number({}, source_line_number); } void HTMLParser::mark_script_already_started_from_rust_parser(HTMLScriptElement& script) { script.set_already_started(Badge {}, true); } void HTMLParser::stop_parsing_from_rust_parser() { stop_parsing(); } bool HTMLParser::process_svg_script_end_tag_from_rust_parser(SVG::SVGScriptElement& script) { // Let the old insertion point have the same value as the current insertion point. m_tokenizer.store_old_insertion_point(); // Let the insertion point be just before the next input character. m_tokenizer.update_insertion_point(); // Increment the parser's script nesting level by one. increment_script_nesting_level(); // Set the parser pause flag to true. m_parser_pause_flag = true; // If the active speculative HTML parser is null and the user agent supports SVG, then Process the SVG script element according to the SVG rules. [SVG] // The active speculative HTML parser is null here. script.process_the_script_element(); // Decrement the parser's script nesting level by one. decrement_script_nesting_level(); // If the parser's script nesting level is zero, then set the parser pause flag to false. if (script_nesting_level() == 0) m_parser_pause_flag = false; // Let the insertion point have the value of the old insertion point. m_tokenizer.restore_old_insertion_point(); // If the SVG script registered itself as a pending parsing-blocking script (external fetch in flight), // pause the parser and schedule a resume check. The parser will resume from // resume_after_parser_blocking_script when the fetch completes. if (document().pending_parsing_blocking_svg_script()) { m_parser_pause_flag = true; schedule_resume_check(); } return m_parser_pause_flag; } void HTMLParser::run_until_completion(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) { m_post_parse_action = [this] { the_end(*m_document, this); }; run(stop_at_insertion_point); if (!m_parser_pause_flag) invoke_post_parse_action(); } // https://html.spec.whatwg.org/multipage/parsing.html#the-end void HTMLParser::the_end(GC::Ref document, GC::Ptr parser) { // Once the user agent stops parsing the document, the user agent must run the following steps: // NOTE: This is a static method because the spec sometimes wants us to "act as if the user agent had stopped // parsing document" which means running these steps without an HTML Parser. That makes it awkward to call, // but it's preferable to duplicating so much code. if (parser) VERIFY(document == parser->m_document); // The entirety of "the end" should be a no-op for HTML fragment parsers, because: // - the temporary document is not accessible, making the DOMContentLoaded event and "ready for post load tasks" do // nothing, making the parser not re-entrant from document.{open,write,close} and document.readyState inaccessible // - there is no Window associated with it and no associated browsing context with the temporary document (meaning // the Window load event is skipped and making the load timing info inaccessible) // - scripts are not able to be prepared, meaning the script queues are empty. // However, the unconditional "spin the event loop" invocations cause two issues: // - Microtask timing is changed, as "spin the event loop" performs an unconditional microtask checkpoint, causing // things to happen out of order. For example, YouTube sets the innerHTML of a "sv); } } } // 5. For each child node of the node, in tree order, run the following steps: actual_node->for_each_child([&](DOM::Node& current_node) { // 1. Let current node be the child node being processed. // 2. Append the appropriate string from the following list to s: if (is(current_node)) { // -> If current node is an Element auto& element = as(current_node); serialize_element(element); return IterationDecision::Continue; } if (is(current_node)) { // -> If current node is a Text node auto& text_node = as(current_node); auto* parent = current_node.parent(); if (is(parent)) { auto& parent_element = as(*parent); // If the parent of current node is a style, script, xmp, iframe, noembed, noframes, or plaintext element, // or if the parent of current node is a noscript element and scripting is enabled for the node, then append the value of current node's data IDL attribute literally. if (parent_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::script, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes, HTML::TagNames::plaintext) || (parent_element.local_name() == HTML::TagNames::noscript && !parent_element.is_scripting_disabled())) { builder.append(text_node.data()); return IterationDecision::Continue; } } // Otherwise, append the value of current node's data IDL attribute, escaped as described below. builder.append(escape_string(text_node.data().utf16_view(), AttributeMode::No)); } if (is(current_node)) { // -> If current node is a Comment auto& comment_node = as(current_node); // Append the literal string "" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN). builder.append(""sv); return IterationDecision::Continue; } if (is(current_node)) { // -> If current node is a ProcessingInstruction auto& processing_instruction_node = as(current_node); // Append the literal string "). builder.append("'); return IterationDecision::Continue; } if (is(current_node)) { // -> If current node is a DocumentType auto& document_type_node = as(current_node); // Append the literal string "" (U+003E GREATER-THAN SIGN). builder.append("'); return IterationDecision::Continue; } return IterationDecision::Continue; }); // 6. Return s. return MUST(builder.to_string()); } // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#current-dimension-value static RefPtr parse_current_dimension_value(float value, Utf8View input, Utf8View::Iterator position) { // 1. If position is past the end of input, then return value as a length. if (position == input.end()) return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value))); // 2. If the code point at position within input is U+0025 (%), then return value as a percentage. if (*position == '%') return CSS::PercentageStyleValue::create(CSS::Percentage(value)); // 3. Return value as a length. return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value))); } // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-dimension-values RefPtr parse_dimension_value(StringView string) { // 1. Let input be the string being parsed. auto input = Utf8View(string); if (!input.validate()) return nullptr; // 2. Let position be a position variable for input, initially pointing at the start of input. auto position = input.begin(); // 3. Skip ASCII whitespace within input given position. while (position != input.end() && Infra::is_ascii_whitespace(*position)) ++position; // 4. If position is past the end of input or the code point at position within input is not an ASCII digit, // then return failure. if (position == input.end() || !is_ascii_digit(*position)) return nullptr; // 5. Collect a sequence of code points that are ASCII digits from input given position, // and interpret the resulting sequence as a base-ten integer. Let value be that number. StringBuilder number_string; while (position != input.end() && is_ascii_digit(*position)) { number_string.append(*position); ++position; } auto integer_value = number_string.string_view().to_number(); float value = min(*integer_value, CSSPixels::max_dimension_value); // 6. If position is past the end of input, then return value as a length. if (position == input.end()) return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels(value))); // 7. If the code point at position within input is U+002E (.), then: if (*position == '.') { // 1. Advance position by 1. ++position; // 2. If position is past the end of input or the code point at position within input is not an ASCII digit, // then return the current dimension value with value, input, and position. if (position == input.end() || !is_ascii_digit(*position)) return parse_current_dimension_value(value, input, position); // 3. Let divisor have the value 1. float divisor = 1; // 4. While true: while (true) { // 1. Multiply divisor by ten. divisor *= 10; // 2. Add the value of the code point at position within input, // interpreted as a base-ten digit (0..9) and divided by divisor, to value. value += (*position - '0') / divisor; // 3. Advance position by 1. ++position; // 4. If position is past the end of input, then return value as a length. if (position == input.end()) return CSS::LengthStyleValue::create(CSS::Length::make_px(CSSPixels::nearest_value_for(value))); // 5. If the code point at position within input is not an ASCII digit, then break. if (!is_ascii_digit(*position)) break; } } // 8. Return the current dimension value with value, input, and position. return parse_current_dimension_value(value, input, position); } // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-non-zero-dimension-values RefPtr parse_nonzero_dimension_value(StringView string) { // 1. Let input be the string being parsed. // 2. Let value be the result of parsing input using the rules for parsing dimension values. auto value = parse_dimension_value(string); // 3. If value is an error, return an error. if (!value) return nullptr; // 4. If value is zero, return an error. if (value->is_length() && value->as_length().raw_value() == 0) return nullptr; if (value->is_percentage() && value->as_percentage().percentage().value() == 0) return nullptr; // 5. If value is a percentage, return value as a percentage. // 6. Return value as a length. return value; } // https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#rules-for-parsing-a-legacy-colour-value Optional parse_legacy_color_value(StringView string_view) { // 1. If input is the empty string, then return failure. if (string_view.is_empty()) return {}; ByteString input = string_view; // 2. Strip leading and trailing ASCII whitespace from input. input = input.trim(Infra::ASCII_WHITESPACE); // 3. If input is an ASCII case-insensitive match for "transparent", then return failure. if (input.equals_ignoring_ascii_case("transparent"sv)) return {}; // 4. If input is an ASCII case-insensitive match for one of the named colors, then return the CSS color corresponding to that keyword. [CSSCOLOR] if (auto const color = Color::from_named_css_color_string(input); color.has_value()) return color; auto hex_nibble_to_u8 = [](char nibble) -> u8 { if (nibble >= '0' && nibble <= '9') return nibble - '0'; if (nibble >= 'a' && nibble <= 'f') return nibble - 'a' + 10; return nibble - 'A' + 10; }; // 5. If input's code point length is four, and the first character in input is U+0023 (#), and the last three characters of input are all ASCII hex digits, then: if (input.length() == 4 && input[0] == '#' && is_ascii_hex_digit(input[1]) && is_ascii_hex_digit(input[2]) && is_ascii_hex_digit(input[3])) { // 1. Let result be a CSS color. Color result; result.set_alpha(0xFF); // 2. Interpret the second character of input as a hexadecimal digit; let the red component of result be the resulting number multiplied by 17. result.set_red(hex_nibble_to_u8(input[1]) * 17); // 3. Interpret the third character of input as a hexadecimal digit; let the green component of result be the resulting number multiplied by 17. result.set_green(hex_nibble_to_u8(input[2]) * 17); // 4. Interpret the fourth character of input as a hexadecimal digit; let the blue component of result be the resulting number multiplied by 17. result.set_blue(hex_nibble_to_u8(input[3]) * 17); // 5. Return result. return result; } // 6. Replace any code points greater than U+FFFF in input (i.e., any characters that are not in the basic multilingual plane) with "00". auto replace_non_basic_multilingual_code_points = [](StringView string) -> ByteString { StringBuilder builder; for (auto code_point : Utf8View { string }) { if (code_point > 0xFFFF) builder.append("00"sv); else builder.append_code_point(code_point); } return builder.to_byte_string(); }; input = replace_non_basic_multilingual_code_points(input); // 7. If input's code point length is greater than 128, truncate input, leaving only the first 128 characters. if (input.length() > 128) input = input.substring(0, 128); // 8. If the first character in input is U+0023 (#), then remove it. if (input[0] == '#') input = input.substring(1); // 9. Replace any character in input that is not an ASCII hex digit with U+0030 (0). auto replace_non_ascii_hex = [](StringView string) -> ByteString { StringBuilder builder; for (auto code_point : Utf8View { string }) { if (is_ascii_hex_digit(code_point)) builder.append_code_point(code_point); else builder.append_code_point('0'); } return builder.to_byte_string(); }; input = replace_non_ascii_hex(input); // 10. While input's code point length is zero or not a multiple of three, append U+0030 (0) to input. StringBuilder builder; builder.append(input); while (builder.length() == 0 || (builder.length() % 3 != 0)) builder.append_code_point('0'); input = builder.to_byte_string(); // 11. Split input into three strings of equal code point length, to obtain three components. Let length be the code point length that all of those components have (one third the code point length of input). auto length = input.length() / 3; auto first_component = input.substring_view(0, length); auto second_component = input.substring_view(length, length); auto third_component = input.substring_view(length * 2, length); // 12. If length is greater than 8, then remove the leading length-8 characters in each component, and let length be 8. if (length > 8) { first_component = first_component.substring_view(length - 8); second_component = second_component.substring_view(length - 8); third_component = third_component.substring_view(length - 8); length = 8; } // 13. While length is greater than two and the first character in each component is U+0030 (0), remove that character and reduce length by one. while (length > 2 && first_component[0] == '0' && second_component[0] == '0' && third_component[0] == '0') { --length; first_component = first_component.substring_view(1); second_component = second_component.substring_view(1); third_component = third_component.substring_view(1); } // 14. If length is still greater than two, truncate each component, leaving only the first two characters in each. if (length > 2) { first_component = first_component.substring_view(0, 2); second_component = second_component.substring_view(0, 2); third_component = third_component.substring_view(0, 2); } auto to_hex = [&](StringView string) -> u8 { if (length == 1) { return hex_nibble_to_u8(string[0]); } auto nib1 = hex_nibble_to_u8(string[0]); auto nib2 = hex_nibble_to_u8(string[1]); return nib1 << 4 | nib2; }; // 15. Let result be a CSS color. Color result; result.set_alpha(0xFF); // 16. Interpret the first component as a hexadecimal number; let the red component of result be the resulting number. result.set_red(to_hex(first_component)); // 17. Interpret the second component as a hexadecimal number; let the green component of result be the resulting number. result.set_green(to_hex(second_component)); // 18. Interpret the third component as a hexadecimal number; let the blue component of result be the resulting number. result.set_blue(to_hex(third_component)); // 19. Return result. return result; } // https://html.spec.whatwg.org/multipage/rendering.html#tables-2 RefPtr parse_table_child_element_align_value(StringView string_view) { // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII // case-insensitive match for either the string "center" or the string "middle", are expected to center text within // themselves, as if they had their 'text-align' property set to 'center' in a presentational hint, and to align // descendants to the center. if (string_view.equals_ignoring_ascii_case("center"sv) || string_view.equals_ignoring_ascii_case("middle"sv)) return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebCenter); // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII // case-insensitive match for the string "left", are expected to left-align text within themselves, as if they had // their 'text-align' property set to 'left' in a presentational hint, and to align descendants to the left. if (string_view.equals_ignoring_ascii_case("left"sv)) return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebLeft); // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII // case-insensitive match for the string "right", are expected to right-align text within themselves, as if they // had their 'text-align' property set to 'right' in a presentational hint, and to align descendants to the right. if (string_view.equals_ignoring_ascii_case("right"sv)) return CSS::KeywordStyleValue::create(CSS::Keyword::LibwebRight); // The thead, tbody, tfoot, tr, td, and th elements, when they have an align attribute whose value is an ASCII // case-insensitive match for the string "justify", are expected to full-justify text within themselves, as if they // had their 'text-align' property set to 'justify' in a presentational hint, and to align descendants to the left. if (string_view.equals_ignoring_ascii_case("justify"sv)) return CSS::KeywordStyleValue::create(CSS::Keyword::Justify); return nullptr; } JS::Realm& HTMLParser::realm() { return m_document->realm(); } // https://html.spec.whatwg.org/multipage/parsing.html#start-the-speculative-html-parser void HTMLParser::start_the_speculative_html_parser() { // 1. Optionally, return. // NOTE: We do not opt out. // 2. If parser's active speculative HTML parser is not null, then stop the speculative HTML parser for parser. if (m_active_speculative_html_parser) stop_the_speculative_html_parser(); // 3. Let speculativeParser be a new speculative HTML parser, with the same state as parser. // 4. Let speculativeDoc be a new isomorphic representation of parser's Document, where all elements are instead // speculative mock elements. Let speculativeParser parse into speculativeDoc. // NOTE: The Rust preload scanner emits speculative fetch candidates directly, so we do not materialize a // speculativeDoc tree or speculative mock elements. auto speculative_parser = SpeculativeHTMLParser::create(realm(), *m_document, m_tokenizer.unparsed_input(), m_document->base_url()); // 5. Set parser's active speculative HTML parser to speculativeParser. m_active_speculative_html_parser = speculative_parser; // 6. In parallel, run speculativeParser until it is stopped or until it reaches the end of its input stream. speculative_parser->run(); } // https://html.spec.whatwg.org/multipage/parsing.html#stop-the-speculative-html-parser void HTMLParser::stop_the_speculative_html_parser() { // 1. Let speculativeParser be parser's active speculative HTML parser. auto speculative_parser = m_active_speculative_html_parser; // 2. If speculativeParser is null, then return. if (!speculative_parser) return; // 3. Throw away any pending content in speculativeParser's input stream, and discard any future content that would // have been added to it. speculative_parser->stop(); // 4. Set parser's active speculative HTML parser to null. m_active_speculative_html_parser = nullptr; } // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser void HTMLParser::abort() { // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it. m_tokenizer.abort(); // 2. Stop the speculative HTML parser for this HTML parser. stop_the_speculative_html_parser(); // 3. Update the current document readiness to "interactive". m_document->update_readiness(DocumentReadyState::Interactive); // 4. Pop all the nodes off the stack of open elements. pop_all_open_elements(); // 5. Update the current document readiness to "complete". m_document->update_readiness(DocumentReadyState::Complete); m_aborted = true; } extern "C" void ladybird_html_parser_log_parse_error(void* parser, u8 const* message_ptr, size_t message_len) { (void)parser_from_html_parser_ffi(parser); dbgln_if(HTML_PARSER_DEBUG, "Rust parser parse error: {}", ffi_string_view(message_ptr, message_len)); } extern "C" void ladybird_html_parser_stop_parsing(void* parser) { parser_from_html_parser_ffi(parser).stop_parsing_from_rust_parser(); } extern "C" bool ladybird_html_parser_parse_errors_enabled() { return HTML_PARSER_DEBUG; } extern "C" void ladybird_html_parser_visit_node(void* visitor, size_t node) { if (node == 0) return; static_cast(visitor)->visit(node_from_html_parser_ffi(node)); } static Optional namespace_from_html_parser_ffi(RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len) { switch (namespace_) { case RustFfiHtmlNamespace::Html: return Namespace::HTML; case RustFfiHtmlNamespace::MathMl: return Namespace::MathML; case RustFfiHtmlNamespace::Svg: return Namespace::SVG; case RustFfiHtmlNamespace::Other: if (namespace_uri_len == 0) return {}; return ffi_fly_string(namespace_uri_ptr, namespace_uri_len); } VERIFY_NOT_REACHED(); } static Optional attribute_namespace_from_html_parser_ffi(RustFfiHtmlAttributeNamespace namespace_) { switch (namespace_) { case RustFfiHtmlAttributeNamespace::None: return {}; case RustFfiHtmlAttributeNamespace::XLink: return Namespace::XLink; case RustFfiHtmlAttributeNamespace::Xml: return Namespace::XML; case RustFfiHtmlAttributeNamespace::Xmlns: return Namespace::XMLNS; case RustFfiHtmlAttributeNamespace::Other: // Only fragment context attributes use this sentinel; parser-created attributes do not cross this path with // arbitrary namespace URIs. VERIFY_NOT_REACHED(); } VERIFY_NOT_REACHED(); } static RustFfiHtmlAttributeNamespace attribute_namespace_to_html_parser_ffi(Optional const& namespace_) { if (namespace_ == Namespace::XLink) return RustFfiHtmlAttributeNamespace::XLink; if (namespace_ == Namespace::XML) return RustFfiHtmlAttributeNamespace::Xml; if (namespace_ == Namespace::XMLNS) return RustFfiHtmlAttributeNamespace::Xmlns; if (namespace_.has_value()) return RustFfiHtmlAttributeNamespace::Other; return RustFfiHtmlAttributeNamespace::None; } static RustFfiHtmlNamespace namespace_to_html_parser_ffi(Optional const& namespace_) { if (!namespace_.has_value()) return RustFfiHtmlNamespace::Other; if (namespace_ == Namespace::HTML) return RustFfiHtmlNamespace::Html; if (namespace_ == Namespace::MathML) return RustFfiHtmlNamespace::MathMl; if (namespace_ == Namespace::SVG) return RustFfiHtmlNamespace::Svg; return RustFfiHtmlNamespace::Other; } static DOM::QuirksMode quirks_mode_from_html_parser_ffi(RustFfiHtmlQuirksMode mode) { switch (mode) { case RustFfiHtmlQuirksMode::No: return DOM::QuirksMode::No; case RustFfiHtmlQuirksMode::Limited: return DOM::QuirksMode::Limited; case RustFfiHtmlQuirksMode::Yes: return DOM::QuirksMode::Yes; } VERIFY_NOT_REACHED(); } static RustFfiHtmlQuirksMode quirks_mode_to_html_parser_ffi(DOM::QuirksMode mode) { switch (mode) { case DOM::QuirksMode::No: return RustFfiHtmlQuirksMode::No; case DOM::QuirksMode::Limited: return RustFfiHtmlQuirksMode::Limited; case DOM::QuirksMode::Yes: return RustFfiHtmlQuirksMode::Yes; } VERIFY_NOT_REACHED(); } static HTMLParser& parser_from_html_parser_ffi(void* parser) { VERIFY(parser); return *reinterpret_cast(parser); } static DOM::Node& node_from_html_parser_ffi(size_t node) { VERIFY(node); return *reinterpret_cast(node); } extern "C" size_t ladybird_html_parser_document_node(void* parser) { return reinterpret_cast(&parser_from_html_parser_ffi(parser).document()); } extern "C" size_t ladybird_html_parser_document_html_element(void* parser) { auto* html_element = parser_from_html_parser_ffi(parser).document().document_element(); if (!html_element || !is(*html_element)) return 0; return reinterpret_cast(html_element); } extern "C" void ladybird_html_parser_set_document_quirks_mode(void* parser, RustFfiHtmlQuirksMode mode) { auto& document = parser_from_html_parser_ffi(parser).document(); if (!document.parser_cannot_change_the_mode()) document.set_quirks_mode(quirks_mode_from_html_parser_ffi(mode)); } extern "C" size_t ladybird_html_parser_create_document_type(void* parser, u8 const* name_ptr, size_t name_len, u8 const* public_id_ptr, size_t public_id_len, u8 const* system_id_ptr, size_t system_id_len) { auto& html_parser = parser_from_html_parser_ffi(parser); auto document_type = html_parser.document().realm().create(html_parser.document()); document_type->set_name(ffi_string(name_ptr, name_len)); document_type->set_public_id(ffi_string(public_id_ptr, public_id_len)); document_type->set_system_id(ffi_string(system_id_ptr, system_id_len)); return reinterpret_cast(document_type.ptr()); } extern "C" size_t ladybird_html_parser_create_comment(void* parser, u8 const* data_ptr, size_t data_len) { auto& html_parser = parser_from_html_parser_ffi(parser); auto comment = html_parser.document().realm().create(html_parser.document(), Utf16String::from_utf8(ffi_string(data_ptr, data_len))); return reinterpret_cast(comment.ptr()); } extern "C" void ladybird_html_parser_insert_text(size_t parent, size_t before, u8 const* data_ptr, size_t data_len) { auto& parent_node = node_from_html_parser_ffi(parent); if (parent_node.is_document()) return; auto data = Utf16String::from_utf8(ffi_string(data_ptr, data_len)); if (before) { auto& before_node = node_from_html_parser_ffi(before); if (auto* previous_text = as_if(before_node.previous_sibling())) { (void)previous_text->append_data(data); return; } auto text = parent_node.document().realm().create(parent_node.document(), data); parent_node.insert_before(*text, &before_node); return; } if (auto* last_text = as_if(parent_node.last_child())) { (void)last_text->append_data(data); return; } auto text = parent_node.document().realm().create(parent_node.document(), data); MUST(parent_node.append_child(*text)); } extern "C" void ladybird_html_parser_add_missing_attribute(size_t element, u8 const* local_name_ptr, size_t local_name_len, u8 const* value_ptr, size_t value_len) { auto& dom_element = as(node_from_html_parser_ffi(element)); auto local_name = ffi_fly_string(local_name_ptr, local_name_len); if (dom_element.has_attribute(local_name)) return; dom_element.append_attribute(local_name, ffi_string(value_ptr, value_len)); } extern "C" void ladybird_html_parser_remove_node(size_t node) { node_from_html_parser_ffi(node).remove(true); } extern "C" void ladybird_html_parser_handle_element_popped(size_t element) { // https://html.spec.whatwg.org/multipage/form-elements.html#the-option-element // When an option element is popped off the stack of open elements of an HTML parser or XML parser, // the user agent must run maybe clone an option into selectedcontent given the option element. // AD-HOC: The Rust tree builder flushes buffered text before invoking this hook, so the option's content is // up-to-date before cloning. if (auto* option_element = as_if(node_from_html_parser_ffi(element))) MUST(option_element->maybe_clone_into_selectedcontent()); } extern "C" void ladybird_html_parser_prepare_svg_script(void* parser, size_t element, size_t source_line_number) { parser_from_html_parser_ffi(parser).prepare_svg_script_for_rust_parser(as(node_from_html_parser_ffi(element)), source_line_number); } extern "C" void ladybird_html_parser_set_script_source_line(void* parser, size_t element, size_t source_line_number) { parser_from_html_parser_ffi(parser).set_script_source_line_from_rust_parser(as(node_from_html_parser_ffi(element)), source_line_number); } extern "C" void ladybird_html_parser_mark_script_already_started(void* parser, size_t element) { if (auto* script = as_if(node_from_html_parser_ffi(element))) parser_from_html_parser_ffi(parser).mark_script_already_started_from_rust_parser(*script); } extern "C" size_t ladybird_html_parser_parent_node(size_t node) { auto* parent = node_from_html_parser_ffi(node).parent(); return reinterpret_cast(parent); } extern "C" size_t ladybird_html_parser_create_element(void* parser, size_t intended_parent, RustFfiHtmlNamespace namespace_, u8 const* namespace_uri_ptr, size_t namespace_uri_len, u8 const* local_name_ptr, size_t local_name_len, RustFfiHtmlParserAttribute const* attributes, size_t attribute_count, bool had_duplicate_attribute, size_t form_element, bool has_template_element_on_stack) { auto& html_parser = parser_from_html_parser_ffi(parser); auto local_name = ffi_fly_string(local_name_ptr, local_name_len); auto token = HTMLToken::make_start_tag(local_name); for (size_t i = 0; i < attribute_count; ++i) { auto const& attribute = attributes[i]; Optional prefix; if (attribute.prefix_len != 0) prefix = ffi_fly_string(attribute.prefix_ptr, attribute.prefix_len); HTMLToken::Attribute token_attribute; token_attribute.prefix = move(prefix); token_attribute.local_name = ffi_fly_string(attribute.local_name_ptr, attribute.local_name_len); token_attribute.namespace_ = attribute_namespace_from_html_parser_ffi(attribute.namespace_); token_attribute.value = ffi_string(attribute.value_ptr, attribute.value_len); token.add_attribute(move(token_attribute)); } auto& intended_parent_node = node_from_html_parser_ffi(intended_parent); GC::Ptr form_element_ptr; if (form_element) form_element_ptr = as(node_from_html_parser_ffi(form_element)); auto element = html_parser.create_element_for_rust_parser(token, namespace_from_html_parser_ffi(namespace_, namespace_uri_ptr, namespace_uri_len), intended_parent_node, had_duplicate_attribute, form_element_ptr, has_template_element_on_stack); return reinterpret_cast(element.ptr()); } extern "C" void ladybird_html_parser_append_child(size_t parent, size_t child) { MUST(node_from_html_parser_ffi(parent).append_child(node_from_html_parser_ffi(child))); } extern "C" void ladybird_html_parser_insert_node(size_t parent, size_t before, size_t child, bool queue_custom_element_reactions) { auto& parent_node = node_from_html_parser_ffi(parent); auto& child_node = node_from_html_parser_ffi(child); auto* child_element = as_if(child_node); if (queue_custom_element_reactions && child_element) relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.append({}); if (!before) { MUST(parent_node.append_child(child_node)); } else { auto& before_node = node_from_html_parser_ffi(before); parent_node.insert_before(child_node, &before_node, false); } if (queue_custom_element_reactions && child_element) { auto queue = relevant_similar_origin_window_agent(*child_element).custom_element_reactions_stack.element_queue_stack.take_last(); Bindings::invoke_custom_element_reactions(queue); } } extern "C" void ladybird_html_parser_move_all_children(size_t from, size_t to) { auto& from_node = node_from_html_parser_ffi(from); auto& to_node = node_from_html_parser_ffi(to); for (auto& child : from_node.children_as_vector()) MUST(to_node.append_child(from_node.remove_child(*child).release_value())); } extern "C" size_t ladybird_html_parser_template_content(size_t element) { auto& template_element = as(node_from_html_parser_ffi(element)); return reinterpret_cast(template_element.content().ptr()); } extern "C" size_t ladybird_html_parser_attach_declarative_shadow_root(size_t host, RustFfiHtmlShadowRootMode mode, RustFfiHtmlSlotAssignmentMode slot_assignment, bool clonable, bool serializable, bool delegates_focus, bool keep_custom_element_registry_null) { auto& host_element = as(node_from_html_parser_ffi(host)); if (host_element.is_shadow_host()) return 0; GC::Ptr registry; if (!keep_custom_element_registry_null) registry = host_element.document().custom_element_registry(); auto result = host_element.attach_a_shadow_root( mode == RustFfiHtmlShadowRootMode::Open ? Bindings::ShadowRootMode::Open : Bindings::ShadowRootMode::Closed, clonable, serializable, delegates_focus, slot_assignment == RustFfiHtmlSlotAssignmentMode::Manual ? Bindings::SlotAssignmentMode::Manual : Bindings::SlotAssignmentMode::Named, registry); if (result.is_error()) return 0; auto shadow_root = host_element.shadow_root(); VERIFY(shadow_root); shadow_root->set_declarative(true); shadow_root->set_available_to_element_internals(true); if (keep_custom_element_registry_null) shadow_root->set_keep_custom_element_registry_null(true); return reinterpret_cast(shadow_root.ptr()); } extern "C" void ladybird_html_parser_set_template_content(size_t element, size_t content) { as(node_from_html_parser_ffi(element)).set_template_contents(as(node_from_html_parser_ffi(content))); } extern "C" bool ladybird_html_parser_allows_declarative_shadow_roots(size_t node) { return node_from_html_parser_ffi(node).document().allow_declarative_shadow_roots(); } }