2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * Copyright (c) 2021, Luke Wilde <lukew@serenityos.org>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * SPDX-License-Identifier: BSD-2-Clause
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-28 13:42:07 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/Bindings/DOMParserPrototype.h>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/Bindings/MainThreadVM.h>
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-04 12:38:36 +13:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/DOM/XMLDocument.h>
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/HTML/DOMParser.h>
							 | 
						
					
						
							
								
									
										
										
										
											2023-06-21 13:53:09 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/HTML/HTMLDocument.h>
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-25 23:15:48 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/HTML/Parser/HTMLParser.h>
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-31 18:39:32 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/HTML/Scripting/Environments.h>
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-08 11:59:40 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/HTML/Window.h>
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-28 16:25:17 +04:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <LibWeb/XML/XMLDocumentBuilder.h>
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								namespace Web::HTML {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-19 19:47:52 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								JS_DEFINE_ALLOCATOR(DOMParser);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 16:38:21 -06:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								WebIDL::ExceptionOr<JS::NonnullGCPtr<DOMParser>> DOMParser::construct_impl(JS::Realm& realm)
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-31 18:39:32 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-13 13:05:26 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return realm.heap().allocate<DOMParser>(realm, realm);
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-31 18:39:32 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-25 16:38:21 -06:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								DOMParser::DOMParser(JS::Realm& realm)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    : PlatformObject(realm)
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-28 13:42:07 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-14 13:21:51 -06:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								DOMParser::~DOMParser() = default;
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-07 08:41:28 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								void DOMParser::initialize(JS::Realm& realm)
							 | 
						
					
						
							
								
									
										
										
										
											2023-01-10 06:28:20 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-07 08:41:28 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    Base::initialize(realm);
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-22 12:55:21 +13:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    set_prototype(&Bindings::ensure_web_prototype<Bindings::DOMParserPrototype>(realm, "DOMParser"_fly_string));
							 | 
						
					
						
							
								
									
										
										
										
											2023-01-10 06:28:20 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-06 14:23:00 +12:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								JS::NonnullGCPtr<DOM::Document> DOMParser::parse_from_string(StringView string, Bindings::DOMParserSupportedType type)
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // 1. Let document be a new Document, whose content type is type and url is this's relevant global object's associated Document's URL.
							 | 
						
					
						
							
								
									
										
										
										
											2023-06-21 13:53:09 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    JS::GCPtr<DOM::Document> document;
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // 2. Switch on type:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    if (type == Bindings::DOMParserSupportedType::Text_Html) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // -> "text/html"
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-03 21:44:00 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        // 1. Set document's type to "html".
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-13 13:05:26 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document = HTML::HTMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 21:46:58 +12:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document->set_content_type(Bindings::idl_enum_to_string(type));
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-03 21:44:00 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document->set_document_type(DOM::Document::Type::HTML);
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 2. Create an HTML parser parser, associated with document.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 3. Place string into the input stream for parser. The encoding confidence is irrelevant.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // FIXME: We don't have the concept of encoding confidence yet.
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-28 13:42:07 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        auto parser = HTMLParser::create(*document, string, "UTF-8");
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        parser->run("about:blank"sv);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } else {
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        // -> Otherwise
							 | 
						
					
						
							
								
									
										
										
										
											2024-01-04 12:38:36 +13:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document = DOM::XMLDocument::create(realm(), verify_cast<HTML::Window>(relevant_global_object(*this)).associated_document().url());
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 21:46:58 +12:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document->set_content_type(Bindings::idl_enum_to_string(type));
							 | 
						
					
						
							
								
									
										
										
										
											2024-02-18 14:27:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        document->set_document_type(DOM::Document::Type::XML);
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-28 16:25:17 +04:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-28 13:42:07 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        XMLDocumentBuilder builder { *document, XMLScriptingSupport::Disabled };
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-28 16:25:17 +04:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        // 2. Parse string using parser.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        auto result = parser.parse_with_listener(builder);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (result.is_error() || builder.has_error()) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            // NOTE: The XML parsing can produce nodes before it hits an error, just remove them.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            // 1. Assert: document has no child nodes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            document->remove_all_children(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            // 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-04 09:46:23 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            auto root = DOM::create_element(*document, "parsererror"_fly_string, "http://www.mozilla.org/newlayout/xml/parsererror.xml"_fly_string).release_value_but_fixme_should_propagate_errors();
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-28 16:25:17 +04:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            // FIXME: 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            // 4. Append root to document.
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-30 17:50:04 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            MUST(document->append_child(*root));
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-28 16:25:17 +04:30
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-19 20:01:20 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    // 3. Return document.
							 | 
						
					
						
							
								
									
										
										
										
											2023-06-21 13:53:09 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    return *document;
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-05 05:20:31 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |