mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2026-04-19 02:10:26 +00:00
Delete Lexer.cpp/h and Token.cpp, replacing all tokenization with a new rust_tokenize() FFI function that calls back for each token. Rewrite SyntaxHighlighter.cpp and js.cpp REPL to use the Rust tokenizer. The token type and category enums in Token.h now mirror the Rust definitions in token.rs. Move is_syntax_character/is_whitespace/is_line_terminator helpers into RegExpConstructor.cpp as static functions, since they were only used there.
423 lines
14 KiB
C++
423 lines
14 KiB
C++
/*
|
|
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
|
* Copyright (c) 2024, Sam Atkins <sam@ladybird.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/StringBuilder.h>
|
|
#include <LibJS/SyntaxHighlighter.h>
|
|
#include <LibJS/Token.h>
|
|
#include <LibURL/URL.h>
|
|
#include <LibWeb/CSS/Parser/Token.h>
|
|
#include <LibWeb/CSS/SyntaxHighlighter/SyntaxHighlighter.h>
|
|
#include <LibWeb/DOMURL/DOMURL.h>
|
|
#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
|
|
#include <LibWebView/SourceHighlighter.h>
|
|
|
|
namespace WebView {
|
|
|
|
SourceDocument::SourceDocument(String const& source)
|
|
{
|
|
// HTML, CSS and JS differ slightly on what they consider a newline to be.
|
|
// In order to make them get along in documents that include a mix of the three, process the source to make the
|
|
// newlines consistent before doing any highlighting.
|
|
|
|
// Optimization: If all the newlines are \n, just use the input string.
|
|
if (!source.code_points().contains_any_of(Array<u32, 3> { '\r', 0x2028, 0x2029 })) {
|
|
m_source = source;
|
|
} else {
|
|
StringBuilder builder { source.byte_count() };
|
|
// Convert any '\r\n', \r, <LS> or <PS> to \n
|
|
bool previous_was_cr = false;
|
|
for (u32 code_point : source.code_points()) {
|
|
if (previous_was_cr && code_point != '\n')
|
|
builder.append('\n');
|
|
previous_was_cr = false;
|
|
|
|
switch (code_point) {
|
|
case '\r':
|
|
previous_was_cr = true;
|
|
break;
|
|
case JS::LINE_SEPARATOR:
|
|
case JS::PARAGRAPH_SEPARATOR:
|
|
builder.append('\n');
|
|
break;
|
|
default:
|
|
builder.append_code_point(code_point);
|
|
}
|
|
}
|
|
m_source = builder.to_string_without_validation();
|
|
}
|
|
|
|
m_source.code_points().for_each_split_view(
|
|
[](u32 it) { return it == '\n'; },
|
|
SplitBehavior::KeepEmpty,
|
|
[&](auto line) {
|
|
m_lines.append(Syntax::TextDocumentLine { *this, line.as_string() });
|
|
});
|
|
}
|
|
|
|
Syntax::TextDocumentLine& SourceDocument::line(size_t line_index)
|
|
{
|
|
return m_lines[line_index];
|
|
}
|
|
|
|
Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const
|
|
{
|
|
return m_lines[line_index];
|
|
}
|
|
|
|
SourceHighlighterClient::SourceHighlighterClient(String const& source, Syntax::Language language)
|
|
: m_document(SourceDocument::create(source))
|
|
{
|
|
// HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span.
|
|
// Also, getting a palette from the UI is nontrivial. So, create a dummy blank one and use that.
|
|
auto buffer = MUST(Core::AnonymousBuffer::create_with_size(sizeof(Gfx::SystemTheme)));
|
|
auto palette_impl = Gfx::PaletteImpl::create_with_anonymous_buffer(buffer);
|
|
Gfx::Palette dummy_palette { palette_impl };
|
|
|
|
switch (language) {
|
|
case Syntax::Language::CSS:
|
|
m_highlighter = make<Web::CSS::SyntaxHighlighter>();
|
|
break;
|
|
case Syntax::Language::HTML:
|
|
m_highlighter = make<Web::HTML::SyntaxHighlighter>();
|
|
break;
|
|
case Syntax::Language::JavaScript:
|
|
m_highlighter = make<JS::SyntaxHighlighter>();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (m_highlighter) {
|
|
m_highlighter->attach(*this);
|
|
m_highlighter->rehighlight(dummy_palette);
|
|
}
|
|
}
|
|
|
|
Vector<Syntax::TextDocumentSpan> const& SourceHighlighterClient::spans() const
|
|
{
|
|
return document().spans();
|
|
}
|
|
|
|
void SourceHighlighterClient::set_span_at_index(size_t index, Syntax::TextDocumentSpan span)
|
|
{
|
|
document().set_span_at_index(index, span);
|
|
}
|
|
|
|
Vector<Syntax::TextDocumentFoldingRegion>& SourceHighlighterClient::folding_regions()
|
|
{
|
|
return document().folding_regions();
|
|
}
|
|
|
|
Vector<Syntax::TextDocumentFoldingRegion> const& SourceHighlighterClient::folding_regions() const
|
|
{
|
|
return document().folding_regions();
|
|
}
|
|
|
|
ByteString SourceHighlighterClient::highlighter_did_request_text() const
|
|
{
|
|
return document().text();
|
|
}
|
|
|
|
void SourceHighlighterClient::highlighter_did_request_update()
|
|
{
|
|
// No-op
|
|
}
|
|
|
|
Syntax::Document& SourceHighlighterClient::highlighter_did_request_document()
|
|
{
|
|
return document();
|
|
}
|
|
|
|
Syntax::TextPosition SourceHighlighterClient::highlighter_did_request_cursor() const
|
|
{
|
|
return {};
|
|
}
|
|
|
|
void SourceHighlighterClient::highlighter_did_set_spans(Vector<Syntax::TextDocumentSpan> spans)
|
|
{
|
|
document().set_spans(span_collection_index, move(spans));
|
|
}
|
|
|
|
void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector<Syntax::TextDocumentFoldingRegion> folding_regions)
|
|
{
|
|
document().set_folding_regions(move(folding_regions));
|
|
}
|
|
|
|
String highlight_source(Optional<URL::URL> const& url, URL::URL const& base_url, String const& source, Syntax::Language language, HighlightOutputMode mode)
|
|
{
|
|
SourceHighlighterClient highlighter_client { source, language };
|
|
return highlighter_client.to_html_string(url, base_url, mode);
|
|
}
|
|
|
|
StringView SourceHighlighterClient::class_for_token(u64 token_type) const
|
|
{
|
|
auto class_for_css_token = [](u64 token_type) {
|
|
switch (static_cast<Web::CSS::Parser::Token::Type>(token_type)) {
|
|
case Web::CSS::Parser::Token::Type::Invalid:
|
|
case Web::CSS::Parser::Token::Type::BadString:
|
|
case Web::CSS::Parser::Token::Type::BadUrl:
|
|
return "invalid"sv;
|
|
case Web::CSS::Parser::Token::Type::Ident:
|
|
return "identifier"sv;
|
|
case Web::CSS::Parser::Token::Type::Function:
|
|
return "function"sv;
|
|
case Web::CSS::Parser::Token::Type::AtKeyword:
|
|
return "at-keyword"sv;
|
|
case Web::CSS::Parser::Token::Type::Hash:
|
|
return "hash"sv;
|
|
case Web::CSS::Parser::Token::Type::String:
|
|
return "string"sv;
|
|
case Web::CSS::Parser::Token::Type::Url:
|
|
return "url"sv;
|
|
case Web::CSS::Parser::Token::Type::Number:
|
|
case Web::CSS::Parser::Token::Type::Dimension:
|
|
case Web::CSS::Parser::Token::Type::Percentage:
|
|
return "number"sv;
|
|
case Web::CSS::Parser::Token::Type::Whitespace:
|
|
return "whitespace"sv;
|
|
case Web::CSS::Parser::Token::Type::Delim:
|
|
case Web::CSS::Parser::Token::Type::Colon:
|
|
case Web::CSS::Parser::Token::Type::Semicolon:
|
|
case Web::CSS::Parser::Token::Type::Comma:
|
|
case Web::CSS::Parser::Token::Type::OpenSquare:
|
|
case Web::CSS::Parser::Token::Type::CloseSquare:
|
|
case Web::CSS::Parser::Token::Type::OpenParen:
|
|
case Web::CSS::Parser::Token::Type::CloseParen:
|
|
case Web::CSS::Parser::Token::Type::OpenCurly:
|
|
case Web::CSS::Parser::Token::Type::CloseCurly:
|
|
return "delimiter"sv;
|
|
case Web::CSS::Parser::Token::Type::CDO:
|
|
case Web::CSS::Parser::Token::Type::CDC:
|
|
return "comment"sv;
|
|
case Web::CSS::Parser::Token::Type::EndOfFile:
|
|
default:
|
|
break;
|
|
}
|
|
return ""sv;
|
|
};
|
|
|
|
auto class_for_js_token = [](u64 token_type) {
|
|
auto category = JS::token_category_from_packed(token_type);
|
|
switch (category) {
|
|
case JS::TokenCategory::Invalid:
|
|
return "invalid"sv;
|
|
case JS::TokenCategory::Trivia:
|
|
return "comment"sv;
|
|
case JS::TokenCategory::Number:
|
|
return "number"sv;
|
|
case JS::TokenCategory::String:
|
|
return "string"sv;
|
|
case JS::TokenCategory::Punctuation:
|
|
return "punctuation"sv;
|
|
case JS::TokenCategory::Operator:
|
|
return "operator"sv;
|
|
case JS::TokenCategory::Keyword:
|
|
return "keyword"sv;
|
|
case JS::TokenCategory::ControlKeyword:
|
|
return "control-keyword"sv;
|
|
case JS::TokenCategory::Identifier:
|
|
return "identifier"sv;
|
|
default:
|
|
break;
|
|
}
|
|
return ""sv;
|
|
};
|
|
|
|
switch (m_highlighter->language()) {
|
|
case Syntax::Language::CSS:
|
|
return class_for_css_token(token_type);
|
|
case Syntax::Language::JavaScript:
|
|
return class_for_js_token(token_type);
|
|
case Syntax::Language::HTML: {
|
|
// HTML has nested CSS and JS highlighters, so we have to decode their token types.
|
|
|
|
// HTML
|
|
if (token_type < Web::HTML::SyntaxHighlighter::JS_TOKEN_START_VALUE) {
|
|
switch (static_cast<Web::HTML::AugmentedTokenKind>(token_type)) {
|
|
case Web::HTML::AugmentedTokenKind::AttributeName:
|
|
return "attribute-name"sv;
|
|
case Web::HTML::AugmentedTokenKind::AttributeValue:
|
|
return "attribute-value"sv;
|
|
case Web::HTML::AugmentedTokenKind::OpenTag:
|
|
case Web::HTML::AugmentedTokenKind::CloseTag:
|
|
return "tag"sv;
|
|
case Web::HTML::AugmentedTokenKind::Comment:
|
|
return "comment"sv;
|
|
case Web::HTML::AugmentedTokenKind::Doctype:
|
|
return "doctype"sv;
|
|
case Web::HTML::AugmentedTokenKind::__Count:
|
|
default:
|
|
return ""sv;
|
|
}
|
|
}
|
|
|
|
// JS
|
|
if (token_type < Web::HTML::SyntaxHighlighter::CSS_TOKEN_START_VALUE) {
|
|
return class_for_js_token(token_type - Web::HTML::SyntaxHighlighter::JS_TOKEN_START_VALUE);
|
|
}
|
|
|
|
// CSS
|
|
return class_for_css_token(token_type - Web::HTML::SyntaxHighlighter::CSS_TOKEN_START_VALUE);
|
|
}
|
|
default:
|
|
return "unknown"sv;
|
|
}
|
|
}
|
|
|
|
String SourceHighlighterClient::to_html_string(Optional<URL::URL> const& url, URL::URL const& base_url, HighlightOutputMode mode) const
|
|
{
|
|
StringBuilder builder;
|
|
|
|
auto append_escaped = [&](Utf32View text) {
|
|
for (auto code_point : text) {
|
|
if (code_point == '&') {
|
|
builder.append("&"sv);
|
|
} else if (code_point == 0xA0) {
|
|
builder.append(" "sv);
|
|
} else if (code_point == '<') {
|
|
builder.append("<"sv);
|
|
} else if (code_point == '>') {
|
|
builder.append(">"sv);
|
|
} else {
|
|
builder.append_code_point(code_point);
|
|
}
|
|
}
|
|
};
|
|
|
|
auto start_token = [&](u64 type) {
|
|
builder.appendff("<span class=\"{}\">", class_for_token(type));
|
|
};
|
|
auto end_token = [&]() {
|
|
builder.append("</span>"sv);
|
|
};
|
|
|
|
if (mode == HighlightOutputMode::FullDocument) {
|
|
builder.append(R"~~~(
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta name="color-scheme" content="dark light">)~~~"sv);
|
|
|
|
if (url.has_value())
|
|
builder.appendff("<title>View Source - {}</title>", escape_html_entities(url->serialize_for_display()));
|
|
else
|
|
builder.append("<title>View Source</title>"sv);
|
|
|
|
builder.appendff("<style type=\"text/css\">{}</style>", HTML_HIGHLIGHTER_STYLE);
|
|
builder.append(R"~~~(
|
|
</head>
|
|
<body>)~~~"sv);
|
|
}
|
|
builder.append("<pre class=\"html\">"sv);
|
|
|
|
static constexpr auto href = to_array<u32>({ 'h', 'r', 'e', 'f' });
|
|
static constexpr auto src = to_array<u32>({ 's', 'r', 'c' });
|
|
bool linkify_attribute = false;
|
|
|
|
auto resolve_url_for_attribute = [&](Utf32View const& attribute_value) -> Optional<URL::URL> {
|
|
if (!linkify_attribute)
|
|
return {};
|
|
|
|
auto attribute_url = MUST(String::formatted("{}", attribute_value));
|
|
auto attribute_url_without_quotes = attribute_url.bytes_as_string_view().trim("\""sv);
|
|
|
|
return Web::DOMURL::parse(attribute_url_without_quotes, base_url);
|
|
};
|
|
|
|
size_t span_index = 0;
|
|
for (size_t line_index = 0; line_index < document().line_count(); ++line_index) {
|
|
auto& line = document().line(line_index);
|
|
auto line_view = line.view();
|
|
builder.append("<div class=\"line\">"sv);
|
|
|
|
size_t next_column = 0;
|
|
|
|
auto draw_text_helper = [&](size_t start, size_t end, Optional<Syntax::TextDocumentSpan const&> span) {
|
|
size_t length = end - start;
|
|
if (length == 0)
|
|
return;
|
|
|
|
auto text = line_view.substring_view(start, length);
|
|
|
|
if (span.has_value()) {
|
|
bool append_anchor_close = false;
|
|
|
|
if (span->data == to_underlying(Web::HTML::AugmentedTokenKind::AttributeName)) {
|
|
linkify_attribute = text == Utf32View { href } || text == Utf32View { src };
|
|
} else if (span->data == to_underlying(Web::HTML::AugmentedTokenKind::AttributeValue)) {
|
|
if (auto href = resolve_url_for_attribute(text); href.has_value()) {
|
|
builder.appendff("<a href=\"{}\">", *href);
|
|
append_anchor_close = true;
|
|
}
|
|
}
|
|
|
|
start_token(span->data);
|
|
append_escaped(text);
|
|
end_token();
|
|
|
|
if (append_anchor_close)
|
|
builder.append("</a>"sv);
|
|
} else {
|
|
append_escaped(text);
|
|
}
|
|
};
|
|
|
|
while (span_index < document().spans().size()) {
|
|
auto& span = document().spans()[span_index];
|
|
if (span.range.start().line() > line_index) {
|
|
// No more spans in this line, moving on
|
|
break;
|
|
}
|
|
size_t span_start;
|
|
if (span.range.start().line() < line_index) {
|
|
span_start = 0;
|
|
} else {
|
|
span_start = span.range.start().column();
|
|
}
|
|
size_t span_end;
|
|
bool span_consumed;
|
|
if (span.range.end().line() > line_index) {
|
|
span_end = line.length();
|
|
span_consumed = false;
|
|
} else {
|
|
span_end = span.range.end().column();
|
|
span_consumed = true;
|
|
}
|
|
|
|
if (span_start != next_column) {
|
|
// Draw unspanned text between spans
|
|
draw_text_helper(next_column, span_start, {});
|
|
}
|
|
draw_text_helper(span_start, span_end, span);
|
|
next_column = span_end;
|
|
if (!span_consumed) {
|
|
// Continue with same span on next line
|
|
break;
|
|
} else {
|
|
++span_index;
|
|
}
|
|
}
|
|
// Draw unspanned text after last span
|
|
if (next_column < line.length()) {
|
|
draw_text_helper(next_column, line.length(), {});
|
|
}
|
|
|
|
builder.append("</div>"sv);
|
|
}
|
|
|
|
builder.append("</pre>"sv);
|
|
if (mode == HighlightOutputMode::FullDocument) {
|
|
builder.append(R"~~~(
|
|
</body>
|
|
</html>
|
|
)~~~"sv);
|
|
}
|
|
|
|
return builder.to_string_without_validation();
|
|
}
|
|
|
|
}
|