2020-05-24 00:14:23 +02:00
/*
* Copyright ( c ) 2020 , Andreas Kling < kling @ serenityos . org >
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* 1. Redistributions of source code must retain the above copyright notice , this
* list of conditions and the following disclaimer .
*
* 2. Redistributions in binary form must reproduce the above copyright notice ,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
* DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY ,
* OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
2020-05-28 18:46:39 +02:00
# define PARSER_DEBUG
2020-05-27 23:32:50 +02:00
2020-05-24 19:51:50 +02:00
# include <AK/Utf32View.h>
2020-05-24 20:29:01 +02:00
# include <LibWeb/DOM/Comment.h>
2020-05-24 00:14:23 +02:00
# include <LibWeb/DOM/Document.h>
# include <LibWeb/DOM/DocumentType.h>
# include <LibWeb/DOM/ElementFactory.h>
2020-05-27 23:32:50 +02:00
# include <LibWeb/DOM/Event.h>
2020-05-24 00:14:23 +02:00
# include <LibWeb/DOM/HTMLFormElement.h>
# include <LibWeb/DOM/HTMLHeadElement.h>
2020-05-24 22:00:46 +02:00
# include <LibWeb/DOM/HTMLScriptElement.h>
2020-05-24 19:51:50 +02:00
# include <LibWeb/DOM/Text.h>
2020-05-24 00:14:23 +02:00
# include <LibWeb/Parser/HTMLDocumentParser.h>
# include <LibWeb/Parser/HTMLToken.h>
2020-05-29 21:44:36 +02:00
# define PARSE_ERROR() \
do { \
2020-05-29 21:20:49 +02:00
dbg ( ) < < " Parse error! " < < __PRETTY_FUNCTION__ < < " @ " < < __LINE__ ; \
2020-05-25 20:02:27 +02:00
} while ( 0 )
2020-05-24 00:14:23 +02:00
namespace Web {
2020-05-28 12:35:19 +02:00
HTMLDocumentParser : : HTMLDocumentParser ( const StringView & input , const String & encoding )
: m_tokenizer ( input , encoding )
2020-05-24 00:14:23 +02:00
{
}
HTMLDocumentParser : : ~ HTMLDocumentParser ( )
{
}
2020-05-24 22:00:46 +02:00
void HTMLDocumentParser : : run ( const URL & url )
2020-05-24 00:14:23 +02:00
{
m_document = adopt ( * new Document ) ;
2020-05-24 22:00:46 +02:00
m_document - > set_url ( url ) ;
2020-05-28 12:35:19 +02:00
m_document - > set_source ( m_tokenizer . source ( ) ) ;
2020-05-24 00:14:23 +02:00
for ( ; ; ) {
auto optional_token = m_tokenizer . next_token ( ) ;
if ( ! optional_token . has_value ( ) )
2020-05-27 23:32:50 +02:00
break ;
2020-05-24 00:14:23 +02:00
auto & token = optional_token . value ( ) ;
2020-05-27 23:32:50 +02:00
# ifdef PARSER_DEBUG
2020-05-24 00:14:23 +02:00
dbg ( ) < < " [ " < < insertion_mode_name ( ) < < " ] " < < token . to_string ( ) ;
2020-05-27 23:32:50 +02:00
# endif
2020-05-24 19:51:50 +02:00
process_using_the_rules_for ( m_insertion_mode , token ) ;
2020-05-28 18:55:18 +02:00
if ( m_stop_parsing ) {
dbg ( ) < < " Stop parsing! :^) " ;
break ;
}
2020-05-24 19:51:50 +02:00
}
2020-05-27 23:32:50 +02:00
// "The end"
m_document - > dispatch_event ( Event : : create ( " DOMContentLoaded " ) ) ;
2020-05-24 19:51:50 +02:00
}
2020-05-24 00:14:23 +02:00
2020-05-24 19:51:50 +02:00
void HTMLDocumentParser : : process_using_the_rules_for ( InsertionMode mode , HTMLToken & token )
{
switch ( mode ) {
case InsertionMode : : Initial :
handle_initial ( token ) ;
break ;
case InsertionMode : : BeforeHTML :
handle_before_html ( token ) ;
break ;
case InsertionMode : : BeforeHead :
handle_before_head ( token ) ;
break ;
case InsertionMode : : InHead :
handle_in_head ( token ) ;
break ;
case InsertionMode : : InHeadNoscript :
handle_in_head_noscript ( token ) ;
break ;
case InsertionMode : : AfterHead :
handle_after_head ( token ) ;
break ;
case InsertionMode : : InBody :
handle_in_body ( token ) ;
break ;
case InsertionMode : : AfterBody :
handle_after_body ( token ) ;
break ;
case InsertionMode : : AfterAfterBody :
handle_after_after_body ( token ) ;
break ;
case InsertionMode : : Text :
handle_text ( token ) ;
break ;
2020-05-25 20:30:34 +02:00
case InsertionMode : : InTable :
handle_in_table ( token ) ;
break ;
2020-05-28 00:27:46 +02:00
case InsertionMode : : InTableBody :
handle_in_table_body ( token ) ;
break ;
case InsertionMode : : InRow :
handle_in_row ( token ) ;
break ;
case InsertionMode : : InCell :
handle_in_cell ( token ) ;
break ;
2020-05-24 19:51:50 +02:00
default :
ASSERT_NOT_REACHED ( ) ;
2020-05-24 00:14:23 +02:00
}
}
void HTMLDocumentParser : : handle_initial ( HTMLToken & token )
{
2020-05-27 01:49:40 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
if ( token . is_comment ( ) ) {
auto comment = adopt ( * new Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
if ( token . is_doctype ( ) ) {
2020-05-24 00:14:23 +02:00
auto doctype = adopt ( * new DocumentType ( document ( ) ) ) ;
doctype - > set_name ( token . m_doctype . name . to_string ( ) ) ;
document ( ) . append_child ( move ( doctype ) ) ;
m_insertion_mode = InsertionMode : : BeforeHTML ;
return ;
}
2020-05-28 00:21:31 +02:00
PARSE_ERROR ( ) ;
document ( ) . set_quirks_mode ( true ) ;
m_insertion_mode = InsertionMode : : BeforeHTML ;
process_using_the_rules_for ( InsertionMode : : BeforeHTML , token ) ;
2020-05-24 00:14:23 +02:00
}
void HTMLDocumentParser : : handle_before_html ( HTMLToken & token )
{
2020-05-27 01:49:40 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_comment ( ) ) {
auto comment = adopt ( * new Comment ( document ( ) , token . m_comment_or_character . data . to_string ( ) ) ) ;
document ( ) . append_child ( move ( comment ) ) ;
return ;
}
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
2020-05-24 00:14:23 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
auto element = create_element_for ( token ) ;
document ( ) . append_child ( element ) ;
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . push ( move ( element ) ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : BeforeHead ;
return ;
}
2020-05-27 01:49:40 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " head " , " body " , " html " , " br " ) ) {
goto AnythingElse ;
}
if ( token . is_end_tag ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
AnythingElse :
auto element = create_element ( document ( ) , " html " ) ;
m_stack_of_open_elements . push ( element ) ;
// FIXME: If the Document is being loaded as part of navigation of a browsing context, then: run the application cache selection algorithm with no manifest, passing it the Document object.
m_insertion_mode = InsertionMode : : BeforeHead ;
process_using_the_rules_for ( InsertionMode : : BeforeHead , token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 19:24:36 +02:00
Element & HTMLDocumentParser : : current_node ( )
2020-05-24 00:14:23 +02:00
{
2020-05-24 19:24:36 +02:00
return m_stack_of_open_elements . current_node ( ) ;
2020-05-24 00:14:23 +02:00
}
RefPtr < Node > HTMLDocumentParser : : find_appropriate_place_for_inserting_node ( )
{
2020-05-24 19:24:36 +02:00
auto & target = current_node ( ) ;
2020-05-24 00:14:23 +02:00
if ( m_foster_parenting ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
return target ;
}
NonnullRefPtr < Element > HTMLDocumentParser : : create_element_for ( HTMLToken & token )
{
auto element = create_element ( document ( ) , token . tag_name ( ) ) ;
for ( auto & attribute : token . m_tag . attributes ) {
element - > set_attribute ( attribute . name_builder . to_string ( ) , attribute . value_builder . to_string ( ) ) ;
}
return element ;
}
RefPtr < Element > HTMLDocumentParser : : insert_html_element ( HTMLToken & token )
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
auto element = create_element_for ( token ) ;
// FIXME: Check if it's possible to insert `element` at `adjusted_insertion_location`
adjusted_insertion_location - > append_child ( element ) ;
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . push ( element ) ;
2020-05-24 00:14:23 +02:00
return element ;
}
void HTMLDocumentParser : : handle_before_head ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
return ;
}
2020-05-27 01:49:40 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-05-24 00:14:23 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " head " ) {
auto element = insert_html_element ( token ) ;
m_head_element = to < HTMLHeadElement > ( element ) ;
m_insertion_mode = InsertionMode : : InHead ;
return ;
}
2020-05-27 01:49:40 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " head " , " body " , " html " , " br " ) ) {
goto AnythingElse ;
}
if ( token . is_end_tag ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
AnythingElse :
HTMLToken fake_head_token ;
fake_head_token . m_type = HTMLToken : : Type : : StartTag ;
fake_head_token . m_tag . tag_name . append ( " head " ) ;
m_head_element = to < HTMLHeadElement > ( insert_html_element ( fake_head_token ) ) ;
m_insertion_mode = InsertionMode : : InHead ;
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 20:29:01 +02:00
void HTMLDocumentParser : : insert_comment ( HTMLToken & token )
{
auto data = token . m_comment_or_character . data . to_string ( ) ;
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
adjusted_insertion_location - > append_child ( adopt ( * new Comment ( document ( ) , data ) ) ) ;
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_in_head ( HTMLToken & token )
{
2020-05-24 20:24:43 +02:00
if ( token . is_parser_whitespace ( ) ) {
insert_character ( token . codepoint ( ) ) ;
return ;
}
2020-05-24 20:29:01 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
2020-05-25 20:16:48 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " base " , " basefont " , " bgsound " , " link " ) ) {
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
return ;
}
2020-05-24 20:24:43 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " title " ) {
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RCDATA ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
return ;
}
2020-05-24 20:36:43 +02:00
if ( token . is_start_tag ( ) & & ( ( token . tag_name ( ) = = " noscript " & & m_scripting_enabled ) | | token . tag_name ( ) = = " noframes " | | token . tag_name ( ) = = " style " ) ) {
parse_generic_raw_text_element ( token ) ;
return ;
}
2020-05-24 22:00:46 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " script " ) {
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
auto element = create_element_for ( token ) ;
auto & script_element = to < HTMLScriptElement > ( * element ) ;
script_element . set_parser_document ( { } , document ( ) ) ;
script_element . set_non_blocking ( { } , false ) ;
if ( m_parsing_fragment ) {
TODO ( ) ;
}
if ( m_invoked_via_document_write ) {
TODO ( ) ;
}
adjusted_insertion_location - > append_child ( element , false ) ;
m_stack_of_open_elements . push ( element ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : ScriptData ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
return ;
}
2020-05-24 00:14:23 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " meta " ) {
auto element = insert_html_element ( token ) ;
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-25 20:16:48 +02:00
token . acknowledge_self_closing_flag_if_set ( ) ;
2020-05-24 00:14:23 +02:00
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " head " ) {
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-24 00:14:23 +02:00
m_insertion_mode = InsertionMode : : AfterHead ;
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
void HTMLDocumentParser : : handle_in_head_noscript ( HTMLToken & )
{
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 20:36:43 +02:00
void HTMLDocumentParser : : parse_generic_raw_text_element ( HTMLToken & token )
{
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : RAWTEXT ) ;
m_original_insertion_mode = m_insertion_mode ;
m_insertion_mode = InsertionMode : : Text ;
}
2020-05-24 19:51:50 +02:00
void HTMLDocumentParser : : insert_character ( u32 data )
{
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node ( ) ;
if ( adjusted_insertion_location - > is_document ( ) )
return ;
if ( adjusted_insertion_location - > last_child ( ) & & adjusted_insertion_location - > last_child ( ) - > is_text ( ) ) {
auto & existing_text_node = to < Text > ( * adjusted_insertion_location - > last_child ( ) ) ;
StringBuilder builder ;
builder . append ( existing_text_node . data ( ) ) ;
builder . append ( Utf32View { & data , 1 } ) ;
existing_text_node . set_data ( builder . to_string ( ) ) ;
return ;
}
2020-05-28 00:23:34 +02:00
auto new_text_node = adopt ( * new Text ( document ( ) , " " ) ) ;
adjusted_insertion_location - > append_child ( new_text_node ) ;
2020-05-24 19:51:50 +02:00
StringBuilder builder ;
builder . append ( Utf32View { & data , 1 } ) ;
2020-05-28 00:23:34 +02:00
new_text_node - > set_data ( builder . to_string ( ) ) ;
2020-05-24 19:51:50 +02:00
}
2020-05-24 00:14:23 +02:00
void HTMLDocumentParser : : handle_after_head ( HTMLToken & token )
{
if ( token . is_character ( ) ) {
2020-05-24 19:51:50 +02:00
if ( token . is_parser_whitespace ( ) ) {
insert_character ( token . codepoint ( ) ) ;
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_comment ( ) ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_doctype ( ) ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " body " ) {
2020-05-24 00:49:22 +02:00
insert_html_element ( token ) ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : InBody ;
return ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " frameset " ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-25 12:57:20 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " base " , " basefont " , " bgsound " , " link " , " meta " , " noframes " , " script " , " style " , " template " , " title " ) ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " template " ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-25 12:57:20 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " body " , " html " , " br " ) ) {
2020-05-24 00:14:23 +02:00
goto AnythingElse ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) = = " head " ) | | token . is_end_tag ( ) ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
AnythingElse :
HTMLToken fake_body_token ;
fake_body_token . m_type = HTMLToken : : Type : : StartTag ;
fake_body_token . m_tag . tag_name . append ( " body " ) ;
insert_html_element ( fake_body_token ) ;
m_insertion_mode = InsertionMode : : InBody ;
2020-05-24 00:49:22 +02:00
// FIXME: Reprocess the current token in InBody!
2020-05-24 00:14:23 +02:00
}
2020-05-24 22:21:25 +02:00
void HTMLDocumentParser : : generate_implied_end_tags ( const FlyString & exception )
2020-05-24 00:14:23 +02:00
{
2020-05-25 12:57:20 +02:00
while ( current_node ( ) . tag_name ( ) ! = exception & & current_node ( ) . tag_name ( ) . is_one_of ( " dd " , " dt " , " li " , " optgroup " , " option " , " p " , " rb " , " rp " , " rt " , " rtc " ) )
2020-05-24 19:24:36 +02:00
m_stack_of_open_elements . pop ( ) ;
2020-05-24 00:49:22 +02:00
}
2020-05-24 22:21:25 +02:00
void HTMLDocumentParser : : close_a_p_element ( )
{
generate_implied_end_tags ( " p " ) ;
if ( current_node ( ) . tag_name ( ) ! = " p " ) {
2020-05-25 20:02:27 +02:00
PARSE_ERROR ( ) ;
2020-05-24 22:21:25 +02:00
}
2020-05-28 18:09:31 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( " p " ) ;
2020-05-24 22:21:25 +02:00
}
2020-05-24 00:49:22 +02:00
void HTMLDocumentParser : : handle_after_body ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) & & token . is_parser_whitespace ( ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-05-28 18:52:32 +02:00
if ( token . is_comment ( ) ) {
TODO ( ) ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
if ( token . is_end_of_file ( ) ) {
2020-05-28 18:55:18 +02:00
stop_parsing ( ) ;
return ;
2020-05-28 18:52:32 +02:00
}
2020-05-24 00:49:22 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " html " ) {
if ( m_parsing_fragment ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:49:22 +02:00
}
m_insertion_mode = InsertionMode : : AfterAfterBody ;
return ;
}
2020-05-28 18:52:32 +02:00
PARSE_ERROR ( ) ;
m_insertion_mode = InsertionMode : : InBody ;
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
2020-05-24 00:49:22 +02:00
}
void HTMLDocumentParser : : handle_after_after_body ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_doctype ( ) | | token . is_parser_whitespace ( ) | | ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) ) {
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
return ;
}
2020-05-24 00:49:22 +02:00
if ( token . is_end_of_file ( ) ) {
2020-05-28 18:55:18 +02:00
stop_parsing ( ) ;
2020-05-24 00:49:22 +02:00
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:49:22 +02:00
}
2020-05-24 19:51:50 +02:00
void HTMLDocumentParser : : reconstruct_the_active_formatting_elements ( )
{
2020-05-24 22:39:59 +02:00
// FIXME: This needs to care about "markers"
2020-05-24 19:51:50 +02:00
if ( m_list_of_active_formatting_elements . is_empty ( ) )
return ;
2020-05-28 00:27:46 +02:00
if ( m_list_of_active_formatting_elements . entries ( ) . last ( ) . is_marker ( ) )
return ;
2020-05-27 23:22:42 +02:00
if ( m_stack_of_open_elements . contains ( * m_list_of_active_formatting_elements . entries ( ) . last ( ) . element ) )
2020-05-24 22:39:59 +02:00
return ;
2020-05-27 23:22:42 +02:00
ssize_t index = m_list_of_active_formatting_elements . entries ( ) . size ( ) - 1 ;
RefPtr < Element > entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
Rewind :
2020-05-26 21:39:28 +02:00
if ( index = = 0 ) {
2020-05-24 22:39:59 +02:00
goto Create ;
}
- - index ;
2020-05-27 23:22:42 +02:00
entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
if ( ! m_stack_of_open_elements . contains ( * entry ) )
goto Rewind ;
Advance :
+ + index ;
2020-05-27 23:22:42 +02:00
entry = m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element ;
ASSERT ( entry ) ;
2020-05-24 22:39:59 +02:00
Create :
// FIXME: Hold on to the real token!
HTMLToken fake_token ;
fake_token . m_type = HTMLToken : : Type : : StartTag ;
fake_token . m_tag . tag_name . append ( entry - > tag_name ( ) ) ;
auto new_element = insert_html_element ( fake_token ) ;
2020-05-27 23:22:42 +02:00
m_list_of_active_formatting_elements . entries ( ) . at ( index ) . element = * new_element ;
2020-05-24 22:39:59 +02:00
2020-05-27 23:22:42 +02:00
if ( index ! = ( ssize_t ) m_list_of_active_formatting_elements . entries ( ) . size ( ) - 1 )
2020-05-24 22:39:59 +02:00
goto Advance ;
2020-05-24 19:51:50 +02:00
}
2020-05-27 23:22:42 +02:00
void HTMLDocumentParser : : run_the_adoption_agency_algorithm ( HTMLToken & token )
{
auto subject = token . tag_name ( ) ;
// If the current node is an HTML element whose tag name is subject,
// and the current node is not in the list of active formatting elements,
// then pop the current node off the stack of open elements, and return.
if ( current_node ( ) . tag_name ( ) = = subject & & ! m_list_of_active_formatting_elements . contains ( current_node ( ) ) ) {
m_stack_of_open_elements . pop ( ) ;
return ;
}
size_t outer_loop_counter = 0 ;
2020-05-27 23:32:50 +02:00
//OuterLoop:
2020-05-27 23:22:42 +02:00
if ( outer_loop_counter > = 8 )
return ;
+ + outer_loop_counter ;
auto formatting_element = m_list_of_active_formatting_elements . last_element_with_tag_name_before_marker ( subject ) ;
if ( ! formatting_element ) {
// FIXME: If there is no such element, then return and instead act as
// described in the "any other end tag" entry above.
TODO ( ) ;
}
if ( ! m_stack_of_open_elements . contains ( * formatting_element ) ) {
PARSE_ERROR ( ) ;
// FIXME: If formatting element is not in the stack of open elements,
// then this is a parse error; remove the element from the list, and return.
TODO ( ) ;
}
if ( ! m_stack_of_open_elements . has_in_scope ( * formatting_element ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( formatting_element ! = & current_node ( ) ) {
PARSE_ERROR ( ) ;
}
2020-05-29 22:06:05 +02:00
RefPtr < Element > furthest_block = m_stack_of_open_elements . topmost_special_node_below ( * formatting_element ) ;
2020-05-27 23:22:42 +02:00
if ( ! furthest_block ) {
while ( & current_node ( ) ! = formatting_element )
m_stack_of_open_elements . pop ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_list_of_active_formatting_elements . remove ( * formatting_element ) ;
return ;
}
// FIXME: Implement the rest of the AAA :^)
TODO ( ) ;
}
2020-05-29 22:06:05 +02:00
bool HTMLDocumentParser : : is_special_tag ( const FlyString & tag_name )
2020-05-29 21:44:36 +02:00
{
return tag_name . is_one_of (
" address " ,
" applet " ,
" area " ,
" article " ,
" aside " ,
" base " ,
" basefont " ,
" bgsound " ,
" blockquote " ,
" body " ,
" br " ,
" button " ,
" caption " ,
" center " ,
" col " ,
" colgroup " ,
" dd " ,
" details " ,
" dir " ,
" div " ,
" dl " ,
" dt " ,
" embed " ,
" fieldset " ,
" figcaption " ,
" figure " ,
" footer " ,
" form " ,
" frame " ,
" frameset " ,
" h1 " ,
" h2 " ,
" h3 " ,
" h4 " ,
" h5 " ,
" h6 " ,
" head " ,
" header " ,
" hgroup " ,
" hr " ,
" html " ,
" iframe " ,
" img " ,
" input " ,
" keygen " ,
" li " ,
" link " ,
" listing " ,
" main " ,
" marquee " ,
" menu " ,
" meta " ,
" nav " ,
" noembed " ,
" noframes " ,
" noscript " ,
" object " ,
" ol " ,
" p " ,
" param " ,
" plaintext " ,
" pre " ,
" script " ,
" section " ,
" select " ,
" source " ,
" style " ,
" summary " ,
" table " ,
" tbody " ,
" td " ,
" template " ,
" textarea " ,
" tfoot " ,
" th " ,
" thead " ,
" title " ,
" tr " ,
" track " ,
" ul " ,
" wbr " ,
" xmp " ) ;
}
2020-05-24 00:49:22 +02:00
void HTMLDocumentParser : : handle_in_body ( HTMLToken & token )
{
2020-05-24 19:51:50 +02:00
if ( token . is_character ( ) ) {
if ( token . codepoint ( ) = = 0 ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 19:51:50 +02:00
}
if ( token . is_parser_whitespace ( ) ) {
reconstruct_the_active_formatting_elements ( ) ;
insert_character ( token . codepoint ( ) ) ;
return ;
}
2020-05-24 22:21:25 +02:00
reconstruct_the_active_formatting_elements ( ) ;
insert_character ( token . codepoint ( ) ) ;
m_frameset_ok = false ;
return ;
2020-05-24 19:51:50 +02:00
}
2020-05-28 18:46:39 +02:00
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " base " , " basefont " , " bgsound " , " link " , " meta " , " noframes " , " script " , " style " , " template " , " title " ) ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " template " ) {
process_using_the_rules_for ( InsertionMode : : InHead , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " body " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " frameset " ) {
TODO ( ) ;
}
if ( token . is_end_of_file ( ) ) {
TODO ( ) ;
}
2020-05-24 00:49:22 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " body " ) {
2020-05-24 19:24:36 +02:00
if ( ! m_stack_of_open_elements . has_in_scope ( " body " ) ) {
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:49:22 +02:00
}
// FIXME: Otherwise, if there is a node in the stack of open elements that is
// not either a dd element, a dt element, an li element, an optgroup element,
// an option element, a p element, an rb element, an rp element, an rt element,
// an rtc element, a tbody element, a td element, a tfoot element, a th element,
// a thead element, a tr element, the body element, or the html element,
// then this is a parse error.
m_insertion_mode = InsertionMode : : AfterBody ;
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " html " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " address " , " article " , " aside " , " blockquote " , " center " , " details " , " dialog " , " dir " , " div " , " dl " , " fieldset " , " figcaption " , " figure " , " footer " , " header " , " hgroup " , " main " , " menu " , " nav " , " ol " , " p " , " section " , " summary " , " ul " ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
insert_html_element ( token ) ;
return ;
}
2020-05-25 12:57:20 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " h1 " , " h2 " , " h3 " , " h4 " , " h5 " , " h6 " ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
if ( current_node ( ) . tag_name ( ) . is_one_of ( " h1 " , " h2 " , " h3 " , " h4 " , " h5 " , " h6 " ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
m_stack_of_open_elements . pop ( ) ;
2020-05-24 22:21:25 +02:00
}
2020-05-25 12:57:20 +02:00
insert_html_element ( token ) ;
return ;
2020-05-24 22:21:25 +02:00
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " pre " , " listing " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " form " ) {
2020-05-30 11:13:57 +02:00
if ( m_form_element & & m_stack_of_open_elements . contains ( " template " ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
auto element = insert_html_element ( token ) ;
if ( ! m_stack_of_open_elements . contains ( " template " ) )
m_form_element = to < HTMLFormElement > ( * element ) ;
return ;
2020-05-29 21:20:49 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " li " ) {
2020-05-29 21:44:36 +02:00
m_frameset_ok = false ;
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
RefPtr < Element > node = m_stack_of_open_elements . elements ( ) [ i ] ;
if ( node - > tag_name ( ) = = " li " ) {
generate_implied_end_tags ( " li " ) ;
if ( current_node ( ) . tag_name ( ) ! = " li " ) {
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( " li " ) ;
break ;
}
if ( is_special_tag ( node - > tag_name ( ) ) & & ! node - > tag_name ( ) . is_one_of ( " address " , " div " , " p " ) )
break ;
}
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
insert_html_element ( token ) ;
return ;
2020-05-29 21:20:49 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " dd " , " dt " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " plaintext " ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
insert_html_element ( token ) ;
m_tokenizer . switch_to ( { } , HTMLTokenizer : : State : : PLAINTEXT ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " button " ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " button " ) ) {
PARSE_ERROR ( ) ;
generate_implied_end_tags ( ) ;
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( " button " ) ;
}
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_frameset_ok = false ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " address " , " article " , " aside " , " blockquote " , " button " , " center " , " details " , " dialog " , " dir " , " div " , " dl " , " fieldset " , " figcaption " , " figure " , " footer " , " header " , " hgroup " , " listing " , " main " , " menu " , " nav " , " ol " , " pre " , " section " , " summary " , " ul " ) ) {
if ( ! m_stack_of_open_elements . has_in_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
if ( current_node ( ) . tag_name ( ) ! = token . tag_name ( ) ) {
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " form " ) {
2020-05-30 11:13:57 +02:00
if ( ! m_stack_of_open_elements . contains ( " template " ) ) {
auto node = m_form_element ;
m_form_element = nullptr ;
if ( ! node | | m_stack_of_open_elements . has_in_scope ( * node ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
if ( & current_node ( ) ! = node ) {
PARSE_ERROR ( ) ;
}
m_stack_of_open_elements . elements ( ) . remove_first_matching ( [ & ] ( auto & entry ) { return entry . ptr ( ) = = node . ptr ( ) ; } ) ;
} else {
TODO ( ) ;
}
return ;
2020-05-29 21:20:49 +02:00
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " p " ) {
if ( ! m_stack_of_open_elements . has_in_button_scope ( " p " ) ) {
PARSE_ERROR ( ) ;
HTMLToken fake_p_token ;
fake_p_token . m_type = HTMLToken : : Type : : StartTag ;
fake_p_token . m_tag . tag_name . append ( " p " ) ;
insert_html_element ( fake_p_token ) ;
}
close_a_p_element ( ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " li " ) {
2020-05-29 22:06:05 +02:00
if ( ! m_stack_of_open_elements . has_in_list_item_scope ( " li " ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( " li " ) ;
if ( current_node ( ) . tag_name ( ) ! = " li " ) {
PARSE_ERROR ( ) ;
dbg ( ) < < " Expected <li> current node, but had < " < < current_node ( ) . tag_name ( ) < < " > " ;
}
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( " li " ) ;
return ;
2020-05-29 21:20:49 +02:00
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " dd " , " dt " ) ) {
TODO ( ) ;
}
2020-05-25 12:57:20 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " h1 " , " h2 " , " h3 " , " h4 " , " h5 " , " h6 " ) ) {
if ( ! m_stack_of_open_elements . has_in_scope ( " h1 " )
& & ! m_stack_of_open_elements . has_in_scope ( " h2 " )
& & ! m_stack_of_open_elements . has_in_scope ( " h3 " )
& & ! m_stack_of_open_elements . has_in_scope ( " h4 " )
& & ! m_stack_of_open_elements . has_in_scope ( " h5 " )
& & ! m_stack_of_open_elements . has_in_scope ( " h6 " ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
return ;
2020-05-25 12:57:20 +02:00
}
2020-05-24 22:21:25 +02:00
2020-05-25 12:57:20 +02:00
generate_implied_end_tags ( ) ;
if ( current_node ( ) . tag_name ( ) ! = token . tag_name ( ) ) {
2020-05-27 18:18:39 +02:00
PARSE_ERROR ( ) ;
2020-05-25 12:57:20 +02:00
}
2020-05-24 22:21:25 +02:00
2020-05-25 12:57:20 +02:00
for ( ; ; ) {
auto popped_element = m_stack_of_open_elements . pop ( ) ;
2020-05-27 18:18:39 +02:00
if ( popped_element - > tag_name ( ) . is_one_of ( " h1 " , " h2 " , " h3 " , " h4 " , " h5 " , " h6 " ) )
2020-05-25 12:57:20 +02:00
break ;
2020-05-24 22:21:25 +02:00
}
2020-05-25 12:57:20 +02:00
return ;
2020-05-24 22:21:25 +02:00
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " a " ) {
2020-05-30 11:03:05 +02:00
if ( auto * element = m_list_of_active_formatting_elements . last_element_with_tag_name_before_marker ( " a " ) ) {
PARSE_ERROR ( ) ;
run_the_adoption_agency_algorithm ( token ) ;
m_list_of_active_formatting_elements . remove ( * element ) ;
m_stack_of_open_elements . elements ( ) . remove_first_matching ( [ & ] ( auto & entry ) {
return entry . ptr ( ) = = element ;
} ) ;
2020-05-29 22:06:05 +02:00
}
reconstruct_the_active_formatting_elements ( ) ;
auto element = insert_html_element ( token ) ;
m_list_of_active_formatting_elements . add ( * element ) ;
return ;
2020-05-24 22:21:25 +02:00
}
2020-05-27 23:22:42 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " b " , " big " , " code " , " em " , " font " , " i " , " s " , " small " , " strike " , " strong " , " tt " , " u " ) ) {
reconstruct_the_active_formatting_elements ( ) ;
auto element = insert_html_element ( token ) ;
m_list_of_active_formatting_elements . add ( * element ) ;
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " nobr " ) {
TODO ( ) ;
}
2020-05-27 23:22:42 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " a " , " b " , " big " , " code " , " em " , " font " , " i " , " nobr " , " s " , " small " , " strike " , " strong " , " tt " , " u " ) ) {
run_the_adoption_agency_algorithm ( token ) ;
return ;
2020-05-24 22:21:25 +02:00
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " applet " , " marquee " , " object " ) ) {
reconstruct_the_active_formatting_elements ( ) ;
2020-05-25 12:57:20 +02:00
insert_html_element ( token ) ;
2020-05-29 21:20:49 +02:00
m_list_of_active_formatting_elements . add_marker ( ) ;
m_frameset_ok = false ;
2020-05-25 12:57:20 +02:00
return ;
}
2020-05-24 00:49:22 +02:00
2020-05-29 21:20:49 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " applet " , " marquee " , " object " ) ) {
TODO ( ) ;
2020-05-24 00:49:22 +02:00
}
2020-05-25 20:30:34 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " table " ) {
2020-05-29 21:20:49 +02:00
if ( ! document ( ) . in_quirks_mode ( ) ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
}
2020-05-25 20:30:34 +02:00
insert_html_element ( token ) ;
m_frameset_ok = false ;
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " br " ) {
TODO ( ) ;
}
2020-05-28 00:25:30 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " area " , " br " , " embed " , " img " , " keygen " , " wbr " ) ) {
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
m_frameset_ok = false ;
return ;
}
2020-05-28 12:18:46 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " input " ) {
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
auto type_attribute = token . attribute ( HTML : : AttributeNames : : type ) ;
if ( type_attribute . is_null ( ) | | type_attribute ! = " hidden " ) {
m_frameset_ok = false ;
}
return ;
}
2020-05-29 21:20:49 +02:00
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " param " , " source " , " track " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " hr " ) {
if ( m_stack_of_open_elements . has_in_button_scope ( " p " ) )
close_a_p_element ( ) ;
insert_html_element ( token ) ;
m_stack_of_open_elements . pop ( ) ;
token . acknowledge_self_closing_flag_if_set ( ) ;
m_frameset_ok = false ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " image " ) {
// Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.)
PARSE_ERROR ( ) ;
token . m_tag . tag_name . clear ( ) ;
token . m_tag . tag_name . append ( " img " ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " textarea " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " xmp " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " iframe " ) {
m_frameset_ok = false ;
parse_generic_raw_text_element ( token ) ;
return ;
}
if ( token . is_start_tag ( ) & & ( ( token . tag_name ( ) = = " noembed " ) | | ( token . tag_name ( ) = = " noscript " & & m_scripting_enabled ) ) ) {
parse_generic_raw_text_element ( token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " select " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " optgroup " , " option " ) ) {
if ( current_node ( ) . tag_name ( ) = = " option " )
m_stack_of_open_elements . pop ( ) ;
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " rb " , " rtc " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " rp " , " rt " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " math " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " svg " ) {
TODO ( ) ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " caption " , " col " , " colgroup " , " frame " , " head " , " tbody " , " td " , " tfoot " , " th " , " thead " , " tr " ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
// Any other start tag
2020-05-24 22:21:25 +02:00
if ( token . is_start_tag ( ) ) {
reconstruct_the_active_formatting_elements ( ) ;
insert_html_element ( token ) ;
return ;
}
2020-05-29 21:20:49 +02:00
// Any other end tag
2020-05-24 22:39:59 +02:00
if ( token . is_end_tag ( ) ) {
RefPtr < Element > node ;
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
node = m_stack_of_open_elements . elements ( ) [ i ] ;
if ( node - > tag_name ( ) = = token . tag_name ( ) ) {
generate_implied_end_tags ( token . tag_name ( ) ) ;
if ( node ! = current_node ( ) ) {
2020-05-27 01:49:40 +02:00
PARSE_ERROR ( ) ;
2020-05-24 22:39:59 +02:00
}
while ( & current_node ( ) ! = node ) {
m_stack_of_open_elements . pop ( ) ;
}
m_stack_of_open_elements . pop ( ) ;
break ;
}
// FIXME: Handle special elements!
}
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-24 22:00:46 +02:00
void HTMLDocumentParser : : increment_script_nesting_level ( )
{
+ + m_script_nesting_level ;
}
void HTMLDocumentParser : : decrement_script_nesting_level ( )
{
ASSERT ( m_script_nesting_level ) ;
- - m_script_nesting_level ;
}
2020-05-24 20:24:43 +02:00
void HTMLDocumentParser : : handle_text ( HTMLToken & token )
2020-05-24 00:14:23 +02:00
{
2020-05-24 20:24:43 +02:00
if ( token . is_character ( ) ) {
insert_character ( token . codepoint ( ) ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " script " ) {
2020-05-24 22:00:46 +02:00
NonnullRefPtr < HTMLScriptElement > script = to < HTMLScriptElement > ( current_node ( ) ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = m_original_insertion_mode ;
// FIXME: Handle tokenizer insertion point stuff here.
increment_script_nesting_level ( ) ;
script - > prepare_script ( { } ) ;
decrement_script_nesting_level ( ) ;
if ( script_nesting_level ( ) = = 0 )
m_parser_pause_flag = false ;
// FIXME: Handle tokenizer insertion point stuff here too.
2020-05-27 23:01:04 +02:00
while ( document ( ) . pending_parsing_blocking_script ( ) ) {
if ( script_nesting_level ( ) ! = 0 ) {
m_parser_pause_flag = true ;
// FIXME: Abort the processing of any nested invocations of the tokenizer,
// yielding control back to the caller. (Tokenization will resume when
// the caller returns to the "outer" tree construction stage.)
TODO ( ) ;
} else {
auto the_script = document ( ) . take_pending_parsing_blocking_script ( { } ) ;
m_tokenizer . set_blocked ( true ) ;
// FIXME: If the parser's Document has a style sheet that is blocking scripts
// or the script's "ready to be parser-executed" flag is not set:
// spin the event loop until the parser's Document has no style sheet
// that is blocking scripts and the script's "ready to be parser-executed"
// flag is set.
ASSERT ( the_script - > is_ready_to_be_parser_executed ( ) ) ;
if ( m_aborted )
return ;
m_tokenizer . set_blocked ( false ) ;
// FIXME: Handle tokenizer insertion point stuff here too.
ASSERT ( script_nesting_level ( ) = = 0 ) ;
increment_script_nesting_level ( ) ;
the_script - > execute_script ( ) ;
decrement_script_nesting_level ( ) ;
ASSERT ( script_nesting_level ( ) = = 0 ) ;
m_parser_pause_flag = false ;
// FIXME: Handle tokenizer insertion point stuff here too.
}
}
2020-05-24 22:00:46 +02:00
return ;
2020-05-24 20:24:43 +02:00
}
2020-05-27 23:01:04 +02:00
2020-05-28 00:23:34 +02:00
// FIXME: This is a bit hackish, we can simplify this once we don't need to support
// the old parser anymore, since then we don't need to maintain its children_changed() semantics.
2020-05-27 23:22:42 +02:00
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " style " ) {
current_node ( ) . children_changed ( ) ;
// NOTE: We don't return here, keep going.
}
2020-05-24 20:24:43 +02:00
if ( token . is_end_tag ( ) ) {
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = m_original_insertion_mode ;
return ;
}
2020-05-30 10:35:25 +02:00
TODO ( ) ;
2020-05-24 00:14:23 +02:00
}
2020-05-28 00:27:46 +02:00
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_context ( )
{
while ( ! current_node ( ) . tag_name ( ) . is_one_of ( " table " , " template " , " html " ) )
m_stack_of_open_elements . pop ( ) ;
}
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_row_context ( )
{
while ( ! current_node ( ) . tag_name ( ) . is_one_of ( " tr " , " template " , " html " ) )
m_stack_of_open_elements . pop ( ) ;
}
void HTMLDocumentParser : : clear_the_stack_back_to_a_table_body_context ( )
{
while ( ! current_node ( ) . tag_name ( ) . is_one_of ( " tbody " , " tfoot " , " thead " , " template " , " html " ) )
m_stack_of_open_elements . pop ( ) ;
}
void HTMLDocumentParser : : handle_in_row ( HTMLToken & token )
{
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " th " , " td " ) ) {
clear_the_stack_back_to_a_table_row_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InCell ;
m_list_of_active_formatting_elements . add_marker ( ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) = = " tr " ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( " tr " ) ) {
PARSE_ERROR ( ) ;
return ;
}
clear_the_stack_back_to_a_table_row_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
return ;
}
TODO ( ) ;
}
2020-05-28 11:45:40 +02:00
void HTMLDocumentParser : : close_the_cell ( )
{
generate_implied_end_tags ( ) ;
if ( ! current_node ( ) . tag_name ( ) . is_one_of ( " td " , " th " ) ) {
PARSE_ERROR ( ) ;
}
while ( ! current_node ( ) . tag_name ( ) . is_one_of ( " td " , " th " ) )
m_stack_of_open_elements . pop ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InRow ;
}
2020-05-28 00:27:46 +02:00
void HTMLDocumentParser : : handle_in_cell ( HTMLToken & token )
{
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " td " , " th " ) ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( token . tag_name ( ) ) ) {
PARSE_ERROR ( ) ;
return ;
}
generate_implied_end_tags ( ) ;
if ( current_node ( ) . tag_name ( ) ! = token . tag_name ( ) ) {
PARSE_ERROR ( ) ;
}
2020-05-28 18:20:55 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( token . tag_name ( ) ) ;
2020-05-28 00:27:46 +02:00
m_list_of_active_formatting_elements . clear_up_to_the_last_marker ( ) ;
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " caption " , " col " , " colgroup " , " tbody " , " td " , " tfoot " , " th " , " thead " , " tr " ) ) {
2020-05-28 11:45:40 +02:00
if ( ! m_stack_of_open_elements . has_in_table_scope ( " td " ) & & m_stack_of_open_elements . has_in_table_scope ( " th " ) ) {
PARSE_ERROR ( ) ;
return ;
}
close_the_cell ( ) ;
process_using_the_rules_for ( m_insertion_mode , token ) ;
return ;
2020-05-28 00:27:46 +02:00
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " body " , " caption " , " col " , " colgroup " , " html " ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_end_tag ( ) & & token . tag_name ( ) . is_one_of ( " table " , " tbody " , " tfoot " , " thead " , " tr " ) ) {
TODO ( ) ;
}
process_using_the_rules_for ( InsertionMode : : InBody , token ) ;
}
void HTMLDocumentParser : : handle_in_table_body ( HTMLToken & token )
{
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " tr " ) {
clear_the_stack_back_to_a_table_body_context ( ) ;
insert_html_element ( token ) ;
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
if ( ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " caption " , " col " , " colgroup " , " tbody " , " tfoot " , " thead " ) )
| | ( token . is_end_tag ( ) & & token . tag_name ( ) = = " table " ) ) {
// FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token.
clear_the_stack_back_to_a_table_body_context ( ) ;
m_stack_of_open_elements . pop ( ) ;
m_insertion_mode = InsertionMode : : InTable ;
process_using_the_rules_for ( InsertionMode : : InTable , token ) ;
return ;
}
TODO ( ) ;
}
2020-05-25 20:30:34 +02:00
void HTMLDocumentParser : : handle_in_table ( HTMLToken & token )
{
if ( token . is_character ( ) & & current_node ( ) . tag_name ( ) . is_one_of ( " table " , " tbody " , " tfoot " , " thead " , " tr " ) ) {
TODO ( ) ;
}
if ( token . is_comment ( ) ) {
insert_comment ( token ) ;
return ;
}
if ( token . is_doctype ( ) ) {
PARSE_ERROR ( ) ;
return ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " caption " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " colgroup " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " col " ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " tbody " , " tfoot " , " thead " ) ) {
TODO ( ) ;
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) . is_one_of ( " td " , " th " , " tr " ) ) {
2020-05-28 00:27:46 +02:00
clear_the_stack_back_to_a_table_context ( ) ;
HTMLToken fake_tbody_token ;
fake_tbody_token . m_type = HTMLToken : : Type : : StartTag ;
fake_tbody_token . m_tag . tag_name . append ( " tbody " ) ;
insert_html_element ( fake_tbody_token ) ;
m_insertion_mode = InsertionMode : : InTableBody ;
process_using_the_rules_for ( InsertionMode : : InTableBody , token ) ;
return ;
2020-05-25 20:30:34 +02:00
}
if ( token . is_start_tag ( ) & & token . tag_name ( ) = = " table " ) {
PARSE_ERROR ( ) ;
TODO ( ) ;
}
if ( token . is_end_tag ( ) ) {
if ( ! m_stack_of_open_elements . has_in_table_scope ( " table " ) ) {
PARSE_ERROR ( ) ;
return ;
}
2020-05-28 18:09:31 +02:00
m_stack_of_open_elements . pop_until_an_element_with_tag_name_has_been_popped ( " table " ) ;
2020-05-25 20:30:34 +02:00
reset_the_insertion_mode_appropriately ( ) ;
return ;
}
TODO ( ) ;
}
void HTMLDocumentParser : : reset_the_insertion_mode_appropriately ( )
{
2020-05-28 00:26:33 +02:00
for ( ssize_t i = m_stack_of_open_elements . elements ( ) . size ( ) - 1 ; i > = 0 ; - - i ) {
RefPtr < Element > node = m_stack_of_open_elements . elements ( ) . at ( i ) ;
if ( node - > tag_name ( ) = = " select " ) {
TODO ( ) ;
}
if ( node - > tag_name ( ) . is_one_of ( " td " , " th " ) ) {
m_insertion_mode = InsertionMode : : InCell ;
return ;
}
if ( node - > tag_name ( ) = = " tr " ) {
m_insertion_mode = InsertionMode : : InRow ;
return ;
}
if ( node - > tag_name ( ) . is_one_of ( " tbody " , " thead " , " tfoot " ) ) {
m_insertion_mode = InsertionMode : : InTableBody ;
return ;
}
if ( node - > tag_name ( ) = = " caption " ) {
m_insertion_mode = InsertionMode : : InCaption ;
return ;
}
if ( node - > tag_name ( ) = = " colgroup " ) {
m_insertion_mode = InsertionMode : : InColumnGroup ;
return ;
}
if ( node - > tag_name ( ) = = " table " ) {
m_insertion_mode = InsertionMode : : InTable ;
return ;
}
if ( node - > tag_name ( ) = = " template " ) {
TODO ( ) ;
}
if ( node - > tag_name ( ) = = " body " ) {
m_insertion_mode = InsertionMode : : InBody ;
return ;
}
if ( node - > tag_name ( ) = = " frameset " ) {
m_insertion_mode = InsertionMode : : InFrameset ;
if ( m_parsing_fragment ) {
TODO ( ) ;
}
return ;
}
if ( node - > tag_name ( ) = = " html " ) {
TODO ( ) ;
}
}
m_insertion_mode = InsertionMode : : InBody ;
if ( m_parsing_fragment ) {
TODO ( ) ;
}
2020-05-25 20:30:34 +02:00
}
2020-05-24 00:14:23 +02:00
const char * HTMLDocumentParser : : insertion_mode_name ( ) const
{
switch ( m_insertion_mode ) {
# define __ENUMERATE_INSERTION_MODE(mode) \
case InsertionMode : : mode : \
return # mode ;
ENUMERATE_INSERTION_MODES
# undef __ENUMERATE_INSERTION_MODE
}
ASSERT_NOT_REACHED ( ) ;
}
Document & HTMLDocumentParser : : document ( )
{
return * m_document ;
}
}