LibWeb: Implement XMLFragmentParser

Implement XMLFragmentParser based on the specification:
https://html.spec.whatwg.org/multipage/xhtml.html

Fixes one WPT in:
domparsing/insert_adjacent_html-xhtml.xhtml
This commit is contained in:
mikiubo 2025-09-11 00:27:13 +02:00 committed by Sam Atkins
parent 0b715b20a2
commit 5b2a71a712
Notes: github-actions[bot] 2025-10-23 10:08:05 +00:00
7 changed files with 267 additions and 2 deletions

View file

@ -1079,6 +1079,7 @@ set(SOURCES
XHR/XMLHttpRequestUpload.cpp
XLink/AttributeNames.cpp
XML/XMLDocumentBuilder.cpp
XML/XMLFragmentParser.cpp
XPath/XPath.cpp
XPath/XPathEvaluator.cpp
XPath/XPathExpression.cpp

View file

@ -92,6 +92,7 @@
#include <LibWeb/WebIDL/AbstractOperations.h>
#include <LibWeb/WebIDL/DOMException.h>
#include <LibWeb/WebIDL/ExceptionOr.h>
#include <LibWeb/XML/XMLFragmentParser.h>
namespace Web::DOM {
@ -2054,9 +2055,9 @@ WebIDL::ExceptionOr<GC::Ref<DOM::DocumentFragment>> Element::parse_fragment(Stri
// 1. Let algorithm be the HTML fragment parsing algorithm.
auto algorithm = HTML::HTMLParser::parse_html_fragment;
// FIXME: 2. If context's node document is an XML document, then set algorithm to the XML fragment parsing algorithm.
// 2. If context's node document is an XML document, then set algorithm to the XML fragment parsing algorithm.
if (document().is_xml_document()) {
dbgln("FIXME: Handle fragment parsing of XML documents");
algorithm = XMLFragmentParser::parse_xml_fragment;
}
// 3. Let newChildren be the result of invoking algorithm given context and markup.

View file

@ -0,0 +1,83 @@
/*
* Copyright (c) 2025, mikiubo <michele.uboldi@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "XMLFragmentParser.h"
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/WebIDL/DOMException.h>
#include <LibWeb/XML/XMLDocumentBuilder.h>
#include <LibXML/Parser/Parser.h>
namespace Web {
// https://html.spec.whatwg.org/multipage/xhtml.html#parsing-xhtml-fragments
WebIDL::ExceptionOr<Vector<GC::Root<DOM::Node>>> XMLFragmentParser::parse_xml_fragment(DOM::Element& context, StringView input, HTML::HTMLParser::AllowDeclarativeShadowRoots allow_declarative_shadow_roots)
{
// 1. Create a new XML parser.
// NB: The feed will be used to create the parser below
StringBuilder feed;
StringBuilder qualified_name_builder;
if (auto const& prefix = context.prefix(); prefix.has_value() && !prefix->is_empty()) {
qualified_name_builder.append(prefix.value());
qualified_name_builder.append(':');
}
qualified_name_builder.append(context.local_name());
auto const& qualified_name = qualified_name_builder.string_view();
// 2. Feed the parser just created the string corresponding to the start tag of context,
feed.append('<');
feed.append(qualified_name);
feed.append('>');
// FIXME
// declaring all the namespace prefixes that are in scope on that element in the DOM,
// FIXME
// as well as declaring the default namespace (if any) that is in scope on that element in the DOM.
// A namespace prefix is in scope if the DOM lookupNamespaceURI() method on the element would return a non-null value for that prefix.
// The default namespace is the namespace for which the DOM isDefaultNamespace() method on the element would return true.
// 3. Feed the parser just created the string input.
feed.append(input);
// 4. Feed the parser just created the string corresponding to the end tag of context.
feed.append("</"sv);
feed.append(qualified_name);
feed.append(">"sv);
GC::Ptr<DOM::Document> document = DOM::Document::create(context.realm());
document->set_document_type(DOM::Document::Type::XML);
if (allow_declarative_shadow_roots == HTML::HTMLParser::AllowDeclarativeShadowRoots::Yes)
document->set_allow_declarative_shadow_roots(true);
XML::Parser parser(feed.string_view());
XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
auto result = parser.parse_with_listener(builder);
// 5. If there is an XML well-formedness or XML namespace well-formedness error, then throw a "SyntaxError" DOMException.
if (result.is_error()) {
return WebIDL::SyntaxError::create(context.realm(), Utf16String::formatted("{}", result.error()));
}
auto* doc_element = document->document_element();
// 6. If the document element of the resulting Document has any sibling nodes, then throw a "SyntaxError" DOMException.
if (doc_element->previous_sibling() || doc_element->next_sibling()) {
return WebIDL::SyntaxError::create(context.realm(), "Document element has sibling nodes"_utf16);
}
// 7. Return the resulting Document node's document element's children, in tree order.
Vector<GC::Root<DOM::Node>> result_nodes;
for (auto* child = doc_element->first_child(); child; child = child->next_sibling()) {
result_nodes.append(*child);
}
return result_nodes;
}
}

View file

@ -0,0 +1,21 @@
/*
* Copyright (c) 2025, mikiubo <michele.uboldi@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/XML/XMLDocumentBuilder.h>
namespace Web {
class XMLFragmentParser final {
public:
static WebIDL::ExceptionOr<Vector<GC::Root<DOM::Node>>> parse_xml_fragment(DOM::Element& context, StringView markup, HTML::HTMLParser::AllowDeclarativeShadowRoots = HTML::HTMLParser::AllowDeclarativeShadowRoots::No);
};
}

View file

@ -0,0 +1,35 @@
Harness status: OK
Found 30 tests
30 Pass
Pass beforeBegin content without next sibling
Pass Afterbegin content without next sibling
Pass BeforeEnd content without next sibling
Pass afterend content without next sibling
Pass beforeBegin content again, with next sibling
Pass Afterbegin content again, with next sibling
Pass BeforeEnd content again, with next sibling
Pass afterend content again, with next sibling
Pass Should throw when inserting with invalid position string
Pass When the parent node is null, insertAdjacentHTML should throw for beforebegin and afterend (text)
Pass When the parent node is null, insertAdjacentHTML should throw for beforebegin and afterend (comments)
Pass When the parent node is null, insertAdjacentHTML should throw for beforebegin and afterend (elements)
Pass When the parent node is a document, insertAdjacentHTML should throw for beforebegin and afterend (text)
Pass When the parent node is a document, insertAdjacentHTML should throw for beforebegin and afterend (comments)
Pass When the parent node is a document, insertAdjacentHTML should throw for beforebegin and afterend (elements)
Pass Inserting after being and before end should order things correctly
Pass beforeBegin child node not in tree but has parent
Pass Afterbegin child node not in tree but has parent
Pass BeforeEnd child node not in tree but has parent
Pass afterend child node not in tree but has parent
Pass Should not run script when appending things which have descendant <script> inserted via insertAdjacentHTML
Pass beforeBegin content2 without next sibling
Pass Afterbegin content2 without next sibling
Pass BeforeEnd content2 without next sibling
Pass afterend content2 without next sibling
Pass beforeBegin content2 test again, now that there's a next sibling
Pass Afterbegin content2 test again, now that there's a next sibling
Pass BeforeEnd content2 test again, now that there's a next sibling
Pass afterend content2 test again, now that there's a next sibling
Pass insertAdjacentHTML in HTML

View file

@ -0,0 +1,91 @@
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>insertAdjacentHTML in HTML</title>
<script src="../resources/testharness.js"></script>
<script src="../resources/testharnessreport.js"></script>
<script src="insert_adjacent_html.js"></script>
</head>
<body>
<p id="display"></p><div id="content" style="display: none"></div><div id="content2" style="display: none"></div>
<script><![CDATA[
var script_ran = false;
function testPositions(node, testDesc) {
test(function() {
script_ran = false;
node.insertAdjacentHTML("beforeBegin", "\u003Cscript>script_ran = true;\u003C/script><i></i>");
assert_equals(node.previousSibling.localName, "i", "Should have had <i> as previous sibling");
assert_equals(node.previousSibling.previousSibling.localName, "script", "Should have had <script> as second previous child");
assert_false(script_ran, "script should not have run");
}, "beforeBegin " + node.id + " " + testDesc)
test(function() {
script_ran = false;
node.insertAdjacentHTML("Afterbegin", "<b></b>\u003Cscript>script_ran = true;\u003C/script>");
assert_equals(node.firstChild.localName, "b", "Should have had <b> as first child");
assert_equals(node.firstChild.nextSibling.localName, "script", "Should have had <script> as second child");
assert_false(script_ran, "script should not have run");
}, "Afterbegin " + node.id + " " + testDesc);
test(function() {
script_ran = false;
node.insertAdjacentHTML("BeforeEnd", "\u003Cscript>script_ran = true;\u003C/script><u></u>");
assert_equals(node.lastChild.localName, "u", "Should have had <u> as last child");
assert_equals(node.lastChild.previousSibling.localName, "script", "Should have had <script> as penultimate child");
assert_false(script_ran, "script should not have run");
}, "BeforeEnd " + node.id + " " + testDesc)
test(function() {
script_ran = false;
node.insertAdjacentHTML("afterend", "<a></a>\u003Cscript>script_ran = true;\u003C/script>");
assert_equals(node.nextSibling.localName, "a", "Should have had <a> as next sibling");
assert_equals(node.nextSibling.nextSibling.localName, "script", "Should have had <script> as second next sibling");
assert_false(script_ran, "script should not have run");
}, "afterend " + node.id + " " + testDesc)
}
var content = document.getElementById("content");
testPositions(content, "without next sibling");
testPositions(content, "again, with next sibling");
test(function() {
assert_throws_dom("SYNTAX_ERR", function() {content.insertAdjacentHTML("bar", "foo")});
assert_throws_dom("SYNTAX_ERR", function() {content.insertAdjacentHTML("beforebegİn", "foo")});
assert_throws_dom("SYNTAX_ERR", function() {content.insertAdjacentHTML("beforebegın", "foo")});
}, "Should throw when inserting with invalid position string");
var parentElement = document.createElement("div");
var child = document.createElement("div");
child.id = "child";
testThrowingNoParent(child, "null");
testThrowingNoParent(document.documentElement, "a document");
test(function() {
child.insertAdjacentHTML("afterBegin", "foo");
child.insertAdjacentHTML("beforeend", "bar");
assert_equals(child.textContent, "foobar");
parentElement.appendChild(child);
}, "Inserting after being and before end should order things correctly");
testPositions(child, "node not in tree but has parent");
test(function() {
script_ran = false;
content.appendChild(parentElement); // must not run scripts
assert_false(script_ran, "script should not have run");
}, "Should not run script when appending things which have descendant <script> inserted via insertAdjacentHTML");
var content2 = document.getElementById("content2");
testPositions(content2, "without next sibling");
testPositions(content2, "test again, now that there's a next sibling");
// XML-only:
test(function() {
assert_throws_dom("SYNTAX_ERR", function() {content.insertAdjacentHTML("beforeend", "<p>")});
});
]]></script>
<div id="log"></div>
</body>
</html>

View file

@ -0,0 +1,33 @@
function testThrowingNoParent(element, desc) {
test(function() {
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("afterend", "") }
);
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("beforebegin", "") }
);
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("afterend", "foo") }
);
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("beforebegin", "foo") }
);
}, "When the parent node is " + desc + ", insertAdjacentHTML should throw for beforebegin and afterend (text)");
test(function() {
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("afterend", "<!-- fail -->") }
);
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("beforebegin", "<!-- fail -->") }
);
}, "When the parent node is " + desc + ", insertAdjacentHTML should throw for beforebegin and afterend (comments)");
test(function() {
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("afterend", "<div></div>") }
);
assert_throws_dom("NO_MODIFICATION_ALLOWED_ERR",
function() { element.insertAdjacentHTML("beforebegin", "<div></div>") }
);
}, "When the parent node is " + desc + ", insertAdjacentHTML should throw for beforebegin and afterend (elements)");
}