2020-05-24 00:14:23 +02:00
/*
2022-10-17 10:46:11 +02:00
* Copyright ( c ) 2020 - 2022 , Andreas Kling < kling @ serenityos . org >
2020-05-24 00:14:23 +02:00
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-05-24 00:14:23 +02:00
*/
# pragma once
2023-05-28 15:04:40 +12:00
# include <LibGfx/Color.h>
2022-10-17 10:46:11 +02:00
# include <LibJS/Heap/Cell.h>
2020-05-24 00:14:23 +02:00
# include <LibWeb/DOM/Node.h>
2020-07-28 19:18:23 +02:00
# include <LibWeb/HTML/Parser/HTMLTokenizer.h>
# include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
# include <LibWeb/HTML/Parser/StackOfOpenElements.h>
2020-05-24 00:14:23 +02:00
2020-07-28 18:20:36 +02:00
namespace Web : : HTML {
2020-05-24 00:14:23 +02:00
# define ENUMERATE_INSERTION_MODES \
__ENUMERATE_INSERTION_MODE ( Initial ) \
__ENUMERATE_INSERTION_MODE ( BeforeHTML ) \
__ENUMERATE_INSERTION_MODE ( BeforeHead ) \
__ENUMERATE_INSERTION_MODE ( InHead ) \
__ENUMERATE_INSERTION_MODE ( InHeadNoscript ) \
__ENUMERATE_INSERTION_MODE ( AfterHead ) \
__ENUMERATE_INSERTION_MODE ( InBody ) \
__ENUMERATE_INSERTION_MODE ( Text ) \
__ENUMERATE_INSERTION_MODE ( InTable ) \
__ENUMERATE_INSERTION_MODE ( InTableText ) \
__ENUMERATE_INSERTION_MODE ( InCaption ) \
__ENUMERATE_INSERTION_MODE ( InColumnGroup ) \
__ENUMERATE_INSERTION_MODE ( InTableBody ) \
__ENUMERATE_INSERTION_MODE ( InRow ) \
__ENUMERATE_INSERTION_MODE ( InCell ) \
__ENUMERATE_INSERTION_MODE ( InSelect ) \
__ENUMERATE_INSERTION_MODE ( InSelectInTable ) \
__ENUMERATE_INSERTION_MODE ( InTemplate ) \
__ENUMERATE_INSERTION_MODE ( AfterBody ) \
__ENUMERATE_INSERTION_MODE ( InFrameset ) \
__ENUMERATE_INSERTION_MODE ( AfterFrameset ) \
__ENUMERATE_INSERTION_MODE ( AfterAfterBody ) \
__ENUMERATE_INSERTION_MODE ( AfterAfterFrameset )
2022-10-17 10:46:11 +02:00
class HTMLParser final : public JS : : Cell {
JS_CELL ( HTMLParser , JS : : Cell ) ;
2023-11-19 19:47:52 +01:00
JS_DECLARE_ALLOCATOR ( HTMLParser ) ;
2022-10-17 10:46:11 +02:00
2022-02-15 18:52:45 +00:00
friend class HTMLTokenizer ;
2020-05-24 00:14:23 +02:00
public :
2021-09-25 23:15:48 +02:00
~ HTMLParser ( ) ;
2020-05-24 00:14:23 +02:00
2022-10-17 10:46:11 +02:00
static JS : : NonnullGCPtr < HTMLParser > create_for_scripting ( DOM : : Document & ) ;
static JS : : NonnullGCPtr < HTMLParser > create_with_uncertain_encoding ( DOM : : Document & , ByteBuffer const & input ) ;
2023-12-16 17:49:34 +03:30
static JS : : NonnullGCPtr < HTMLParser > create ( DOM : : Document & , StringView input , ByteString const & encoding ) ;
2021-05-12 10:47:12 +02:00
2022-02-19 15:58:21 +01:00
void run ( ) ;
2021-09-13 00:33:23 +03:00
void run ( const AK : : URL & ) ;
2020-05-24 00:14:23 +02:00
2023-12-19 12:51:34 +00:00
static void the_end ( JS : : NonnullGCPtr < DOM : : Document > , JS : : GCPtr < HTMLParser > = nullptr ) ;
2020-07-26 19:37:56 +02:00
DOM : : Document & document ( ) ;
2020-05-24 00:14:23 +02:00
2022-08-28 13:42:07 +02:00
static Vector < JS : : Handle < DOM : : Node > > parse_html_fragment ( DOM : : Element & context_element , StringView ) ;
2023-11-10 09:46:54 +13:00
static String serialize_html_fragment ( DOM : : Node const & node ) ;
2020-06-25 23:42:08 +02:00
2020-05-24 00:14:23 +02:00
enum class InsertionMode {
# define __ENUMERATE_INSERTION_MODE(mode) mode,
ENUMERATE_INSERTION_MODES
# undef __ENUMERATE_INSERTION_MODE
} ;
InsertionMode insertion_mode ( ) const { return m_insertion_mode ; }
2023-11-04 10:19:21 +01:00
static bool is_special_tag ( FlyString const & tag_name , Optional < FlyString > const & namespace_ ) ;
2020-05-29 22:06:05 +02:00
2022-02-19 15:58:21 +01:00
HTMLTokenizer & tokenizer ( ) { return m_tokenizer ; }
2022-09-20 21:08:14 +02:00
// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
void abort ( ) ;
2022-02-19 15:58:21 +01:00
bool aborted ( ) const { return m_aborted ; }
2022-09-20 21:08:14 +02:00
bool stopped ( ) const { return m_stop_parsing ; }
2022-02-19 15:58:21 +01:00
size_t script_nesting_level ( ) const { return m_script_nesting_level ; }
2020-05-24 00:14:23 +02:00
private :
2023-12-16 17:49:34 +03:30
HTMLParser ( DOM : : Document & , StringView input , ByteString const & encoding ) ;
2022-02-21 21:54:21 +01:00
HTMLParser ( DOM : : Document & ) ;
2022-10-17 10:46:11 +02:00
virtual void visit_edges ( Cell : : Visitor & ) override ;
2022-04-01 20:58:27 +03:00
char const * insertion_mode_name ( ) const ;
2020-05-24 00:14:23 +02:00
2022-04-01 20:58:27 +03:00
DOM : : QuirksMode which_quirks_mode ( HTMLToken const & ) const ;
2020-07-18 21:17:17 +01:00
2020-05-24 00:14:23 +02:00
void handle_initial ( HTMLToken & ) ;
void handle_before_html ( HTMLToken & ) ;
void handle_before_head ( HTMLToken & ) ;
void handle_in_head ( HTMLToken & ) ;
void handle_in_head_noscript ( HTMLToken & ) ;
void handle_after_head ( HTMLToken & ) ;
void handle_in_body ( HTMLToken & ) ;
2020-05-24 00:49:22 +02:00
void handle_after_body ( HTMLToken & ) ;
void handle_after_after_body ( HTMLToken & ) ;
2020-05-24 00:14:23 +02:00
void handle_text ( HTMLToken & ) ;
2020-05-25 20:30:34 +02:00
void handle_in_table ( HTMLToken & ) ;
2020-05-28 00:27:46 +02:00
void handle_in_table_body ( HTMLToken & ) ;
void handle_in_row ( HTMLToken & ) ;
void handle_in_cell ( HTMLToken & ) ;
2020-05-30 17:57:41 +02:00
void handle_in_table_text ( HTMLToken & ) ;
2020-05-30 19:58:52 +02:00
void handle_in_select_in_table ( HTMLToken & ) ;
void handle_in_select ( HTMLToken & ) ;
2020-06-13 05:09:54 +01:00
void handle_in_caption ( HTMLToken & ) ;
2020-06-13 06:22:18 +01:00
void handle_in_column_group ( HTMLToken & ) ;
2020-06-21 06:58:03 +02:00
void handle_in_template ( HTMLToken & ) ;
void handle_in_frameset ( HTMLToken & ) ;
void handle_after_frameset ( HTMLToken & ) ;
void handle_after_after_frameset ( HTMLToken & ) ;
2020-05-24 00:14:23 +02:00
2020-05-28 18:55:18 +02:00
void stop_parsing ( ) { m_stop_parsing = true ; }
2023-10-01 20:07:44 +13:00
void generate_implied_end_tags ( FlyString const & exception = { } ) ;
2020-08-19 22:30:33 +01:00
void generate_all_implied_end_tags_thoroughly ( ) ;
2023-11-04 10:19:21 +01:00
JS : : NonnullGCPtr < DOM : : Element > create_element_for ( HTMLToken const & , Optional < FlyString > const & namespace_ , DOM : : Node & intended_parent ) ;
2020-06-21 17:00:55 +02:00
struct AdjustedInsertionLocation {
2022-08-28 13:42:07 +02:00
JS : : GCPtr < DOM : : Node > parent ;
JS : : GCPtr < DOM : : Node > insert_before_sibling ;
2020-06-21 17:00:55 +02:00
} ;
2022-08-28 13:42:07 +02:00
AdjustedInsertionLocation find_appropriate_place_for_inserting_node ( JS : : GCPtr < DOM : : Element > override_target = nullptr ) ;
2020-06-21 17:00:55 +02:00
2020-07-26 19:37:56 +02:00
DOM : : Text * find_character_insertion_node ( ) ;
2020-06-03 21:53:08 +02:00
void flush_character_insertions ( ) ;
2023-11-04 10:19:21 +01:00
JS : : NonnullGCPtr < DOM : : Element > insert_foreign_element ( HTMLToken const & , Optional < FlyString > const & namespace_ ) ;
2022-08-28 13:42:07 +02:00
JS : : NonnullGCPtr < DOM : : Element > insert_html_element ( HTMLToken const & ) ;
2020-07-26 19:37:56 +02:00
DOM : : Element & current_node ( ) ;
2020-10-12 01:51:28 +01:00
DOM : : Element & adjusted_current_node ( ) ;
2020-07-26 19:37:56 +02:00
DOM : : Element & node_before_current_node ( ) ;
2020-05-24 19:51:50 +02:00
void insert_character ( u32 data ) ;
2020-05-24 20:29:01 +02:00
void insert_comment ( HTMLToken & ) ;
2020-05-24 19:51:50 +02:00
void reconstruct_the_active_formatting_elements ( ) ;
2020-05-24 22:21:25 +02:00
void close_a_p_element ( ) ;
2020-05-24 19:51:50 +02:00
void process_using_the_rules_for ( InsertionMode , HTMLToken & ) ;
2020-10-12 01:51:28 +01:00
void process_using_the_rules_for_foreign_content ( HTMLToken & ) ;
2020-05-24 20:36:43 +02:00
void parse_generic_raw_text_element ( HTMLToken & ) ;
2020-05-24 22:00:46 +02:00
void increment_script_nesting_level ( ) ;
void decrement_script_nesting_level ( ) ;
2020-07-21 19:03:05 +01:00
void reset_the_insertion_mode_appropriately ( ) ;
2020-05-30 16:22:25 +02:00
2020-06-21 06:58:03 +02:00
void adjust_mathml_attributes ( HTMLToken & ) ;
2020-10-12 01:51:28 +01:00
void adjust_svg_tag_names ( HTMLToken & ) ;
2020-06-21 06:58:03 +02:00
void adjust_svg_attributes ( HTMLToken & ) ;
2023-11-04 11:26:44 +13:00
static void adjust_foreign_attributes ( HTMLToken & ) ;
2020-06-21 06:58:03 +02:00
2020-05-30 16:22:25 +02:00
enum AdoptionAgencyAlgorithmOutcome {
DoNothing ,
RunAnyOtherEndTagSteps ,
} ;
AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm ( HTMLToken & ) ;
2020-05-28 00:27:46 +02:00
void clear_the_stack_back_to_a_table_context ( ) ;
void clear_the_stack_back_to_a_table_body_context ( ) ;
void clear_the_stack_back_to_a_table_row_context ( ) ;
2020-05-28 11:45:40 +02:00
void close_the_cell ( ) ;
2020-05-24 00:14:23 +02:00
InsertionMode m_insertion_mode { InsertionMode : : Initial } ;
2020-05-24 20:24:43 +02:00
InsertionMode m_original_insertion_mode { InsertionMode : : Initial } ;
2020-05-24 19:51:50 +02:00
2020-05-24 19:24:36 +02:00
StackOfOpenElements m_stack_of_open_elements ;
2020-06-21 06:58:03 +02:00
Vector < InsertionMode > m_stack_of_template_insertion_modes ;
2020-05-27 23:22:42 +02:00
ListOfActiveFormattingElements m_list_of_active_formatting_elements ;
2020-05-24 19:51:50 +02:00
2020-05-24 00:14:23 +02:00
HTMLTokenizer m_tokenizer ;
bool m_foster_parenting { false } ;
2020-05-24 00:49:22 +02:00
bool m_frameset_ok { true } ;
bool m_parsing_fragment { false } ;
2022-09-23 20:43:17 +01:00
// https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag
// The scripting flag is set to "enabled" if scripting was enabled for the Document with which the parser is associated when the parser was created, and "disabled" otherwise.
2020-05-24 20:36:43 +02:00
bool m_scripting_enabled { true } ;
2022-09-23 20:43:17 +01:00
2020-05-24 22:00:46 +02:00
bool m_invoked_via_document_write { false } ;
2020-05-27 23:01:04 +02:00
bool m_aborted { false } ;
2020-05-24 22:00:46 +02:00
bool m_parser_pause_flag { false } ;
2020-05-28 18:55:18 +02:00
bool m_stop_parsing { false } ;
2020-05-24 22:00:46 +02:00
size_t m_script_nesting_level { 0 } ;
2020-05-24 00:14:23 +02:00
2022-08-28 13:42:07 +02:00
JS : : Realm & realm ( ) ;
2022-10-17 10:46:11 +02:00
JS : : GCPtr < DOM : : Document > m_document ;
JS : : GCPtr < HTMLHeadElement > m_head_element ;
JS : : GCPtr < HTMLFormElement > m_form_element ;
JS : : GCPtr < DOM : : Element > m_context_element ;
2020-05-30 17:57:41 +02:00
Vector < HTMLToken > m_pending_table_character_tokens ;
2020-06-03 21:53:08 +02:00
2022-10-17 10:46:11 +02:00
JS : : GCPtr < DOM : : Text > m_character_insertion_node ;
2020-06-03 21:53:08 +02:00
StringBuilder m_character_insertion_builder ;
2020-05-24 00:14:23 +02:00
} ;
2022-03-26 14:29:52 +01:00
RefPtr < CSS : : StyleValue > parse_dimension_value ( StringView ) ;
RefPtr < CSS : : StyleValue > parse_nonzero_dimension_value ( StringView ) ;
2024-01-16 19:04:45 +01:00
Optional < Color > parse_legacy_color_value ( StringView ) ;
2022-03-26 14:29:52 +01:00
2020-05-24 00:14:23 +02:00
}