mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-08 06:09:58 +00:00
LibJS: Cache length-in-code-units in SourceCode
This avoids some bit twiddling whenever accessing the length in code units in the lexer.
This commit is contained in:
parent
7c7a035347
commit
201803f601
Notes:
github-actions[bot]
2025-11-09 11:15:02 +00:00
Author: https://github.com/awesomekling
Commit: 201803f601
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6764
3 changed files with 16 additions and 13 deletions
|
|
@ -280,16 +280,16 @@ Lexer::Lexer(NonnullRefPtr<SourceCode const> source_code, size_t line_number, si
|
|||
void Lexer::consume()
|
||||
{
|
||||
auto did_reach_eof = [this] {
|
||||
if (m_position < source().length_in_code_units())
|
||||
if (m_position < source_code().length_in_code_units())
|
||||
return false;
|
||||
m_eof = true;
|
||||
m_current_code_unit = '\0';
|
||||
m_position = source().length_in_code_units() + 1;
|
||||
m_position = source_code().length_in_code_units() + 1;
|
||||
m_line_column++;
|
||||
return true;
|
||||
};
|
||||
|
||||
if (m_position > source().length_in_code_units())
|
||||
if (m_position > source_code().length_in_code_units())
|
||||
return;
|
||||
|
||||
if (did_reach_eof())
|
||||
|
|
@ -325,7 +325,7 @@ void Lexer::consume()
|
|||
dbgln_if(LEXER_DEBUG, "Previous was CR, this is LF - not incrementing line number again.");
|
||||
}
|
||||
} else {
|
||||
if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < source().length_in_code_units()) {
|
||||
if (AK::UnicodeUtils::is_utf16_high_surrogate(m_current_code_unit) && m_position < source_code().length_in_code_units()) {
|
||||
if (AK::UnicodeUtils::is_utf16_low_surrogate(source().code_unit_at(m_position))) {
|
||||
++m_position;
|
||||
|
||||
|
|
@ -412,7 +412,7 @@ bool Lexer::consume_binary_number()
|
|||
template<typename Callback>
|
||||
bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
|
||||
{
|
||||
if (m_position >= source().length_in_code_units())
|
||||
if (m_position >= source_code().length_in_code_units())
|
||||
return false;
|
||||
return m_current_code_unit == '_'
|
||||
&& callback(source().code_unit_at(m_position));
|
||||
|
|
@ -420,7 +420,7 @@ bool Lexer::match_numeric_literal_separator_followed_by(Callback callback) const
|
|||
|
||||
bool Lexer::match(char16_t a, char16_t b) const
|
||||
{
|
||||
if (m_position >= source().length_in_code_units())
|
||||
if (m_position >= source_code().length_in_code_units())
|
||||
return false;
|
||||
|
||||
return m_current_code_unit == a
|
||||
|
|
@ -429,7 +429,7 @@ bool Lexer::match(char16_t a, char16_t b) const
|
|||
|
||||
bool Lexer::match(char16_t a, char16_t b, char16_t c) const
|
||||
{
|
||||
if (m_position + 1 >= source().length_in_code_units())
|
||||
if (m_position + 1 >= source_code().length_in_code_units())
|
||||
return false;
|
||||
|
||||
return m_current_code_unit == a
|
||||
|
|
@ -439,7 +439,7 @@ bool Lexer::match(char16_t a, char16_t b, char16_t c) const
|
|||
|
||||
bool Lexer::match(char16_t a, char16_t b, char16_t c, char16_t d) const
|
||||
{
|
||||
if (m_position + 2 >= source().length_in_code_units())
|
||||
if (m_position + 2 >= source_code().length_in_code_units())
|
||||
return false;
|
||||
|
||||
return m_current_code_unit == a
|
||||
|
|
@ -591,7 +591,7 @@ bool Lexer::is_block_comment_end() const
|
|||
|
||||
bool Lexer::is_numeric_literal_start() const
|
||||
{
|
||||
return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < source().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position)));
|
||||
return is_ascii_digit(m_current_code_unit) || (m_current_code_unit == '.' && m_position < source_code().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position)));
|
||||
}
|
||||
|
||||
bool Lexer::slash_means_division() const
|
||||
|
|
@ -837,7 +837,7 @@ Token const& Lexer::next()
|
|||
while (m_current_code_unit != stop_char && m_current_code_unit != '\r' && m_current_code_unit != '\n' && !is_eof()) {
|
||||
if (m_current_code_unit == '\\') {
|
||||
consume();
|
||||
if (m_current_code_unit == '\r' && m_position < source().length_in_code_units() && source().code_unit_at(m_position) == '\n') {
|
||||
if (m_current_code_unit == '\r' && m_position < source_code().length_in_code_units() && source().code_unit_at(m_position) == '\n') {
|
||||
consume();
|
||||
}
|
||||
}
|
||||
|
|
@ -872,7 +872,7 @@ Token const& Lexer::next()
|
|||
consume();
|
||||
}
|
||||
|
||||
if (!found_token && m_position + 1 < source().length_in_code_units()) {
|
||||
if (!found_token && m_position + 1 < source_code().length_in_code_units()) {
|
||||
auto three_chars_view = source().substring_view(m_position - 1, 3);
|
||||
if (auto type = parse_three_char_token(three_chars_view); type != TokenType::Invalid) {
|
||||
found_token = true;
|
||||
|
|
@ -883,11 +883,11 @@ Token const& Lexer::next()
|
|||
}
|
||||
}
|
||||
|
||||
if (!found_token && m_position < source().length_in_code_units()) {
|
||||
if (!found_token && m_position < source_code().length_in_code_units()) {
|
||||
auto two_chars_view = source().substring_view(m_position - 1, 2);
|
||||
if (auto type = parse_two_char_token(two_chars_view); type != TokenType::Invalid) {
|
||||
// OptionalChainingPunctuator :: ?. [lookahead ∉ DecimalDigit]
|
||||
if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < source().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position + 1)))) {
|
||||
if (!(type == TokenType::QuestionMarkPeriod && m_position + 1 < source_code().length_in_code_units() && is_ascii_digit(source().code_unit_at(m_position + 1)))) {
|
||||
found_token = true;
|
||||
token_type = type;
|
||||
consume();
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ SourceCode::SourceCode(String filename, Utf16String code)
|
|||
: m_filename(move(filename))
|
||||
, m_code(move(code))
|
||||
, m_code_view(m_code.utf16_view())
|
||||
, m_length_in_code_units(m_code_view.length_in_code_units())
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ public:
|
|||
String const& filename() const { return m_filename; }
|
||||
Utf16String const& code() const { return m_code; }
|
||||
Utf16View const& code_view() const { return m_code_view; }
|
||||
size_t length_in_code_units() const { return m_length_in_code_units; }
|
||||
|
||||
SourceRange range_from_offsets(u32 start_offset, u32 end_offset) const;
|
||||
|
||||
|
|
@ -31,6 +32,7 @@ private:
|
|||
String m_filename;
|
||||
Utf16String m_code;
|
||||
Utf16View m_code_view;
|
||||
size_t m_length_in_code_units { 0 };
|
||||
|
||||
// For fast mapping of offsets to line/column numbers, we build a list of
|
||||
// starting points (with byte offsets into the source string) and which
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue