2021-05-20 23:15:33 +04:30
/*
* Copyright ( c ) 2021 , Ali Mohammad Pur < mpfard @ serenityos . org >
2021-06-04 11:25:09 +02:00
* Copyright ( c ) 2021 , Max Wipfli < mail @ maxwipfli . ch >
2021-05-20 23:15:33 +04:30
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
2021-06-04 11:25:09 +02:00
# include <AK/Debug.h>
2021-06-07 12:03:09 +04:30
# include <LibJS/SyntaxHighlighter.h>
2021-10-21 21:35:11 +01:00
# include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
2021-05-20 23:15:33 +04:30
# include <LibWeb/HTML/Parser/HTMLTokenizer.h>
# include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
namespace Web : : HTML {
enum class AugmentedTokenKind : u32 {
AttributeName ,
AttributeValue ,
OpenTag ,
CloseTag ,
Comment ,
Doctype ,
2021-06-07 12:03:09 +04:30
__Count ,
2021-05-20 23:15:33 +04:30
} ;
2021-06-07 12:03:09 +04:30
bool SyntaxHighlighter : : is_identifier ( u64 token ) const
2021-05-20 23:15:33 +04:30
{
if ( ! token )
return false ;
return false ;
}
2021-06-07 12:03:09 +04:30
bool SyntaxHighlighter : : is_navigatable ( u64 ) const
2021-05-20 23:15:33 +04:30
{
return false ;
}
2021-06-04 11:25:09 +02:00
void SyntaxHighlighter : : rehighlight ( Palette const & palette )
2021-05-20 23:15:33 +04:30
{
2021-06-04 11:25:09 +02:00
dbgln_if ( SYNTAX_HIGHLIGHTING_DEBUG , " (HTML::SyntaxHighlighter) starting rehighlight " ) ;
2021-05-20 23:15:33 +04:30
auto text = m_client - > get_text ( ) ;
2021-06-07 12:03:09 +04:30
clear_nested_token_pairs ( ) ;
2021-05-20 23:15:33 +04:30
2023-03-03 14:40:58 +00:00
// FIXME: Add folding regions for start and end tags.
Vector < GUI : : TextDocumentFoldingRegion > folding_regions ;
2021-05-20 23:15:33 +04:30
Vector < GUI : : TextDocumentSpan > spans ;
auto highlight = [ & ] ( auto start_line , auto start_column , auto end_line , auto end_column , Gfx : : TextAttributes attributes , AugmentedTokenKind kind ) {
2021-06-04 11:38:57 +02:00
if ( start_line > end_line | | ( start_line = = end_line & & start_column > = end_column ) ) {
dbgln_if ( SYNTAX_HIGHLIGHTING_DEBUG , " (HTML::SyntaxHighlighter) discarding ({}-{}) to ({}-{}) because it has zero or negative length " , start_line , start_column , end_line , end_column ) ;
return ;
}
2021-06-04 11:25:09 +02:00
dbgln_if ( SYNTAX_HIGHLIGHTING_DEBUG , " (HTML::SyntaxHighlighter) highlighting ({}-{}) to ({}-{}) with color {} " , start_line , start_column , end_line , end_column , attributes . color ) ;
2021-05-20 23:15:33 +04:30
spans . empend (
GUI : : TextRange {
{ start_line , start_column } ,
{ end_line , end_column } ,
} ,
move ( attributes ) ,
2021-06-07 12:03:09 +04:30
static_cast < u64 > ( kind ) ,
2021-05-20 23:15:33 +04:30
false ) ;
} ;
HTMLTokenizer tokenizer { text , " utf-8 " } ;
[[maybe_unused]] enum class State {
HTML ,
Javascript ,
CSS ,
} state { State : : HTML } ;
2021-06-07 12:03:09 +04:30
StringBuilder substring_builder ;
GUI : : TextPosition substring_start_position ;
2021-05-20 23:15:33 +04:30
for ( ; ; ) {
auto token = tokenizer . next_token ( ) ;
2021-06-04 11:38:57 +02:00
if ( ! token . has_value ( ) | | token . value ( ) . is_end_of_file ( ) )
2021-05-20 23:15:33 +04:30
break ;
2022-12-06 01:12:49 +00:00
dbgln_if ( SYNTAX_HIGHLIGHTING_DEBUG , " (HTML::SyntaxHighlighter) got token of type {} " , token - > to_deprecated_string ( ) ) ;
2021-05-20 23:15:33 +04:30
if ( token - > is_start_tag ( ) ) {
if ( token - > tag_name ( ) = = " script " sv ) {
tokenizer . switch_to ( HTMLTokenizer : : State : : ScriptData ) ;
state = State : : Javascript ;
2021-06-07 12:03:09 +04:30
substring_start_position = { token - > end_position ( ) . line , token - > end_position ( ) . column } ;
2021-05-20 23:15:33 +04:30
} else if ( token - > tag_name ( ) = = " style " sv ) {
tokenizer . switch_to ( HTMLTokenizer : : State : : RAWTEXT ) ;
state = State : : CSS ;
2021-06-07 12:03:09 +04:30
substring_start_position = { token - > end_position ( ) . line , token - > end_position ( ) . column } ;
2021-05-20 23:15:33 +04:30
}
} else if ( token - > is_end_tag ( ) ) {
if ( token - > tag_name ( ) . is_one_of ( " script " sv , " style " sv ) ) {
if ( state = = State : : Javascript ) {
2021-06-07 12:03:09 +04:30
Syntax : : ProxyHighlighterClient proxy_client {
* m_client ,
substring_start_position ,
static_cast < u64 > ( AugmentedTokenKind : : __Count ) + first_free_token_kind_serial_value ( ) ,
substring_builder . string_view ( )
} ;
{
JS : : SyntaxHighlighter highlighter ;
highlighter . attach ( proxy_client ) ;
highlighter . rehighlight ( palette ) ;
highlighter . detach ( ) ;
register_nested_token_pairs ( proxy_client . corrected_token_pairs ( highlighter . matching_token_pairs ( ) ) ) ;
}
2021-06-12 13:24:45 +02:00
spans . extend ( proxy_client . corrected_spans ( ) ) ;
2023-03-03 14:40:58 +00:00
folding_regions . extend ( proxy_client . corrected_folding_regions ( ) ) ;
2021-06-07 12:03:09 +04:30
substring_builder . clear ( ) ;
2021-05-20 23:15:33 +04:30
} else if ( state = = State : : CSS ) {
2021-10-21 21:35:11 +01:00
Syntax : : ProxyHighlighterClient proxy_client {
* m_client ,
substring_start_position ,
static_cast < u64 > ( AugmentedTokenKind : : __Count ) + first_free_token_kind_serial_value ( ) ,
substring_builder . string_view ( )
} ;
{
CSS : : SyntaxHighlighter highlighter ;
highlighter . attach ( proxy_client ) ;
highlighter . rehighlight ( palette ) ;
highlighter . detach ( ) ;
register_nested_token_pairs ( proxy_client . corrected_token_pairs ( highlighter . matching_token_pairs ( ) ) ) ;
}
spans . extend ( proxy_client . corrected_spans ( ) ) ;
2023-03-03 14:40:58 +00:00
folding_regions . extend ( proxy_client . corrected_folding_regions ( ) ) ;
2021-06-07 12:03:09 +04:30
substring_builder . clear ( ) ;
2021-05-20 23:15:33 +04:30
}
state = State : : HTML ;
}
2021-06-07 12:03:09 +04:30
} else if ( state ! = State : : HTML ) {
VERIFY ( token - > is_character ( ) ) ;
substring_builder . append_code_point ( token - > code_point ( ) ) ;
continue ;
2021-05-20 23:15:33 +04:30
}
size_t token_start_offset = token - > is_end_tag ( ) ? 1 : 0 ;
if ( token - > is_comment ( ) ) {
highlight (
token - > start_position ( ) . line ,
token - > start_position ( ) . column ,
2021-06-04 11:38:57 +02:00
token - > end_position ( ) . line ,
token - > end_position ( ) . column ,
2021-05-20 23:15:33 +04:30
{ palette . syntax_comment ( ) , { } } ,
AugmentedTokenKind : : Comment ) ;
2023-03-03 14:40:58 +00:00
GUI : : TextDocumentFoldingRegion region ;
region . range . set_start ( { token - > start_position ( ) . line , token - > start_position ( ) . column + comment_prefix ( ) - > length ( ) } ) ;
region . range . set_end ( { token - > end_position ( ) . line , token - > end_position ( ) . column - comment_suffix ( ) - > length ( ) } ) ;
folding_regions . append ( move ( region ) ) ;
2021-05-20 23:15:33 +04:30
} else if ( token - > is_start_tag ( ) | | token - > is_end_tag ( ) ) {
highlight (
token - > start_position ( ) . line ,
token - > start_position ( ) . column + token_start_offset ,
token - > start_position ( ) . line ,
2021-06-04 11:38:57 +02:00
token - > start_position ( ) . column + token_start_offset + token - > tag_name ( ) . length ( ) ,
2023-03-15 12:35:00 +00:00
{ palette . syntax_keyword ( ) , { } , true } ,
2021-05-20 23:15:33 +04:30
token - > is_start_tag ( ) ? AugmentedTokenKind : : OpenTag : AugmentedTokenKind : : CloseTag ) ;
2021-07-14 23:53:11 +02:00
token - > for_each_attribute ( [ & ] ( auto & attribute ) {
2021-05-20 23:15:33 +04:30
highlight (
attribute . name_start_position . line ,
attribute . name_start_position . column + token_start_offset ,
attribute . name_end_position . line ,
attribute . name_end_position . column + token_start_offset ,
{ palette . syntax_identifier ( ) , { } } ,
AugmentedTokenKind : : AttributeName ) ;
highlight (
attribute . value_start_position . line ,
attribute . value_start_position . column + token_start_offset ,
attribute . value_end_position . line ,
attribute . value_end_position . column + token_start_offset ,
{ palette . syntax_string ( ) , { } } ,
AugmentedTokenKind : : AttributeValue ) ;
2021-07-14 23:53:11 +02:00
return IterationDecision : : Continue ;
} ) ;
2021-05-20 23:15:33 +04:30
} else if ( token - > is_doctype ( ) ) {
highlight (
token - > start_position ( ) . line ,
token - > start_position ( ) . column ,
token - > start_position ( ) . line ,
token - > start_position ( ) . column ,
{ palette . syntax_preprocessor_statement ( ) , { } } ,
AugmentedTokenKind : : Doctype ) ;
}
}
2021-06-04 11:25:09 +02:00
if constexpr ( SYNTAX_HIGHLIGHTING_DEBUG ) {
dbgln ( " (HTML::SyntaxHighlighter) list of all spans: " ) ;
for ( auto & span : spans )
2021-06-07 12:03:09 +04:30
dbgln ( " {}, {} - {} " , span . range , span . attributes . color , span . data ) ;
2021-06-04 11:25:09 +02:00
dbgln ( " (HTML::SyntaxHighlighter) end of list " ) ;
}
2021-05-20 23:15:33 +04:30
m_client - > do_set_spans ( move ( spans ) ) ;
2023-03-03 14:40:58 +00:00
m_client - > do_set_folding_regions ( move ( folding_regions ) ) ;
2021-05-20 23:15:33 +04:30
m_has_brace_buddies = false ;
highlight_matching_token_pair ( ) ;
m_client - > do_update ( ) ;
}
2021-06-07 12:03:09 +04:30
Vector < Syntax : : Highlighter : : MatchingTokenPair > SyntaxHighlighter : : matching_token_pairs_impl ( ) const
2021-05-20 23:15:33 +04:30
{
static Vector < MatchingTokenPair > pairs ;
if ( pairs . is_empty ( ) ) {
2021-06-07 12:03:09 +04:30
pairs . append ( { static_cast < u64 > ( AugmentedTokenKind : : OpenTag ) , static_cast < u64 > ( AugmentedTokenKind : : CloseTag ) } ) ;
2021-05-20 23:15:33 +04:30
}
return pairs ;
}
2021-06-07 12:03:09 +04:30
bool SyntaxHighlighter : : token_types_equal ( u64 token0 , u64 token1 ) const
2021-05-20 23:15:33 +04:30
{
return token0 = = token1 ;
}
}