2021-07-05 05:20:31 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
|
|
|
*/
|
|
|
|
|
|
2022-08-28 13:42:07 +02:00
|
|
|
#include <LibWeb/Bindings/DOMParserPrototype.h>
|
|
|
|
|
#include <LibWeb/Bindings/MainThreadVM.h>
|
2024-01-04 12:38:36 +13:00
|
|
|
#include <LibWeb/DOM/XMLDocument.h>
|
2021-07-05 05:20:31 +01:00
|
|
|
#include <LibWeb/HTML/DOMParser.h>
|
2023-06-21 13:53:09 +02:00
|
|
|
#include <LibWeb/HTML/HTMLDocument.h>
|
2021-09-25 23:15:48 +02:00
|
|
|
#include <LibWeb/HTML/Parser/HTMLParser.h>
|
2022-08-31 18:39:32 +02:00
|
|
|
#include <LibWeb/HTML/Scripting/Environments.h>
|
2023-10-08 11:59:40 +02:00
|
|
|
#include <LibWeb/HTML/Window.h>
|
2025-08-11 00:31:46 +02:00
|
|
|
#include <LibWeb/TrustedTypes/RequireTrustedTypesForDirective.h>
|
|
|
|
|
#include <LibWeb/TrustedTypes/TrustedTypePolicy.h>
|
2022-03-28 16:25:17 +04:30
|
|
|
#include <LibWeb/XML/XMLDocumentBuilder.h>
|
2021-07-05 05:20:31 +01:00
|
|
|
|
|
|
|
|
namespace Web::HTML {
|
|
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
GC_DEFINE_ALLOCATOR(DOMParser);
|
2023-11-19 19:47:52 +01:00
|
|
|
|
2024-11-15 04:01:23 +13:00
|
|
|
WebIDL::ExceptionOr<GC::Ref<DOMParser>> DOMParser::construct_impl(JS::Realm& realm)
|
2022-08-31 18:39:32 +02:00
|
|
|
{
|
2024-11-14 05:50:17 +13:00
|
|
|
return realm.create<DOMParser>(realm);
|
2022-08-31 18:39:32 +02:00
|
|
|
}
|
|
|
|
|
|
2022-09-25 16:38:21 -06:00
|
|
|
DOMParser::DOMParser(JS::Realm& realm)
|
|
|
|
|
: PlatformObject(realm)
|
2022-08-28 13:42:07 +02:00
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 13:21:51 -06:00
|
|
|
DOMParser::~DOMParser() = default;
|
2021-07-05 05:20:31 +01:00
|
|
|
|
2023-08-07 08:41:28 +02:00
|
|
|
void DOMParser::initialize(JS::Realm& realm)
|
2023-01-10 06:28:20 -05:00
|
|
|
{
|
2024-03-16 13:13:08 +01:00
|
|
|
WEB_SET_PROTOTYPE_FOR_INTERFACE(DOMParser);
|
2025-04-20 16:22:57 +02:00
|
|
|
Base::initialize(realm);
|
2023-01-10 06:28:20 -05:00
|
|
|
}
|
|
|
|
|
|
2021-07-05 05:20:31 +01:00
|
|
|
// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring
|
2025-08-11 00:31:46 +02:00
|
|
|
WebIDL::ExceptionOr<GC::Root<DOM::Document>> DOMParser::parse_from_string(Utf16String string, Bindings::DOMParserSupportedType type)
|
2021-07-05 05:20:31 +01:00
|
|
|
{
|
2025-08-11 00:31:46 +02:00
|
|
|
// 1. Let compliantString to the result of invoking the Get Trusted Type compliant string algorithm with
|
|
|
|
|
// TrustedHTML, this's relevant global object, string, "DOMParser parseFromString", and "script".
|
|
|
|
|
auto const compliant_string = TRY(TrustedTypes::get_trusted_type_compliant_string(
|
|
|
|
|
TrustedTypes::TrustedTypeName::TrustedHTML,
|
|
|
|
|
relevant_global_object(*this),
|
|
|
|
|
move(string),
|
2025-11-04 15:27:46 +00:00
|
|
|
TrustedTypes::InjectionSink::DOMParser_parseFromString,
|
2025-08-11 00:31:46 +02:00
|
|
|
TrustedTypes::Script.to_string()));
|
2024-06-25 20:55:58 +01:00
|
|
|
|
|
|
|
|
// 2. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
|
2024-11-15 04:01:23 +13:00
|
|
|
GC::Ptr<DOM::Document> document;
|
2025-07-04 20:29:03 +01:00
|
|
|
auto& associated_document = as<HTML::Window>(relevant_global_object(*this)).associated_document();
|
2021-07-05 05:20:31 +01:00
|
|
|
|
2024-06-25 20:55:58 +01:00
|
|
|
// 3. Switch on type:
|
2022-02-19 20:01:20 +00:00
|
|
|
if (type == Bindings::DOMParserSupportedType::Text_Html) {
|
|
|
|
|
// -> "text/html"
|
2025-07-04 20:29:03 +01:00
|
|
|
document = HTML::HTMLDocument::create(realm(), associated_document.url());
|
2025-07-09 08:29:06 +03:00
|
|
|
document->set_content_type(Bindings::idl_enum_to_string(type));
|
|
|
|
|
document->set_document_type(DOM::Document::Type::HTML);
|
2022-02-19 20:01:20 +00:00
|
|
|
|
2025-08-11 00:31:46 +02:00
|
|
|
// 1. Parse HTML from a string given document and compliantString.
|
|
|
|
|
document->parse_html_from_a_string(compliant_string.to_utf8_but_should_be_ported_to_utf16());
|
2021-07-05 05:20:31 +01:00
|
|
|
} else {
|
2022-02-19 20:01:20 +00:00
|
|
|
// -> Otherwise
|
2025-07-22 02:14:34 +02:00
|
|
|
document = DOM::Document::create(realm(), associated_document.url());
|
2023-09-15 21:46:58 +12:00
|
|
|
document->set_content_type(Bindings::idl_enum_to_string(type));
|
2024-02-18 14:27:25 +00:00
|
|
|
document->set_document_type(DOM::Document::Type::XML);
|
2022-03-28 16:25:17 +04:30
|
|
|
|
|
|
|
|
// 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
|
2025-08-11 00:31:46 +02:00
|
|
|
auto const utf8_complaint_string = compliant_string.to_utf8_but_should_be_ported_to_utf16();
|
|
|
|
|
XML::Parser parser(utf8_complaint_string, { .resolve_external_resource = resolve_xml_resource });
|
2022-08-28 13:42:07 +02:00
|
|
|
XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
|
2025-08-11 00:31:46 +02:00
|
|
|
// 2. Parse compliantString using parser.
|
2022-03-28 16:25:17 +04:30
|
|
|
auto result = parser.parse_with_listener(builder);
|
|
|
|
|
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
|
|
|
|
|
if (result.is_error() || builder.has_error()) {
|
|
|
|
|
// NOTE: The XML parsing can produce nodes before it hits an error, just remove them.
|
|
|
|
|
// 1. Assert: document has no child nodes.
|
|
|
|
|
document->remove_all_children(true);
|
|
|
|
|
// 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
|
2023-11-04 09:46:23 +01:00
|
|
|
auto root = DOM::create_element(*document, "parsererror"_fly_string, "http://www.mozilla.org/newlayout/xml/parsererror.xml"_fly_string).release_value_but_fixme_should_propagate_errors();
|
2022-03-28 16:25:17 +04:30
|
|
|
// FIXME: 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
|
|
|
|
|
// 4. Append root to document.
|
2022-10-30 17:50:04 +00:00
|
|
|
MUST(document->append_child(*root));
|
2022-03-28 16:25:17 +04:30
|
|
|
}
|
2021-07-05 05:20:31 +01:00
|
|
|
}
|
|
|
|
|
|
2025-07-04 20:29:03 +01:00
|
|
|
// AD-HOC: Setting the origin to match that of the associated document matches the behavior of existing browsers
|
|
|
|
|
// and avoids a crash, since we expect the origin to always be set.
|
|
|
|
|
// Spec issue: https://github.com/whatwg/html/issues/11429
|
|
|
|
|
document->set_origin(associated_document.origin());
|
|
|
|
|
|
2022-02-19 20:01:20 +00:00
|
|
|
// 3. Return document.
|
2025-08-11 00:31:46 +02:00
|
|
|
return document;
|
2021-07-05 05:20:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|