ladybird/Libraries/LibJS/SourceCode.cpp
Andreas Kling 21cbfb3cb1 LibJS: Drop source ranges from bytecode source maps
Store source map locations as bytecode offset, line, and column.
Runtime consumers only emit the start line and column, so source end
positions and source text offsets do not need to be carried through
Executable source maps, bytecode cache serialization, or the Rust FFI.

Keep SourceCode's internal position cache able to track source text
offsets so callers can still translate source offsets to line and
column pairs when needed. Hash dump-bytecode IDs from the name, first
source position, and bytecode size instead of source slices that need
end offsets.

Bump the bytecode cache format version for the slimmer serialized
source map entry shape.
2026-05-14 09:41:03 +02:00

146 lines
5 KiB
C++

/*
* Copyright (c) 2022-2023, Andreas Kling <andreas@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/BinarySearch.h>
#include <AK/Utf8View.h>
#include <LibJS/SourceCode.h>
#include <LibJS/SourceRange.h>
#include <LibJS/Token.h>
namespace JS {
NonnullRefPtr<SourceCode const> SourceCode::create(String filename, Utf16String code)
{
return adopt_ref(*new SourceCode(move(filename), move(code)));
}
SourceCode::SourceCode(String filename, Utf16String code)
: m_filename(move(filename))
, m_code(move(code))
, m_code_view(m_code.utf16_view())
, m_length_in_code_units(m_code_view.length_in_code_units())
{
}
u16 const* SourceCode::utf16_data() const
{
if (!m_code_view.has_ascii_storage())
return reinterpret_cast<u16 const*>(m_code_view.utf16_span().data());
if (m_utf16_data_cache.is_empty() && m_length_in_code_units > 0) {
auto ascii = m_code_view.ascii_span();
m_utf16_data_cache.ensure_capacity(m_length_in_code_units);
for (size_t i = 0; i < m_length_in_code_units; ++i)
m_utf16_data_cache.unchecked_append(static_cast<u16>(ascii[i]));
}
return m_utf16_data_cache.data();
}
void SourceCode::fill_position_cache() const
{
constexpr size_t predicted_minimum_cached_positions = 8;
constexpr size_t minimum_distance_between_cached_positions = 32;
constexpr size_t maximum_distance_between_cached_positions = 8192;
if (m_code.is_empty())
return;
u32 previous_code_point = 0;
u32 line = 1;
u32 column = 1;
u32 offset_of_last_starting_point = 0;
m_cached_positions.ensure_capacity(predicted_minimum_cached_positions + (m_code.length_in_code_units() / maximum_distance_between_cached_positions));
m_cached_positions.append({ .position = { .line = 1, .column = 1 }, .offset = 0 });
auto view = m_code.utf16_view();
for (auto it = view.begin(); it != view.end(); ++it) {
u32 code_point = *it;
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
auto offset = view.iterator_offset(it);
VERIFY(offset <= NumericLimits<u32>::max());
bool is_nonempty_line = is_line_terminator && previous_code_point != '\n' && previous_code_point != LINE_SEPARATOR && previous_code_point != PARAGRAPH_SEPARATOR && (code_point == '\n' || previous_code_point != '\r');
auto distance_between_cached_position = offset - offset_of_last_starting_point;
if ((distance_between_cached_position >= minimum_distance_between_cached_positions && is_nonempty_line) || distance_between_cached_position >= maximum_distance_between_cached_positions) {
m_cached_positions.append({ .position = { .line = line, .column = column }, .offset = static_cast<u32>(offset) });
offset_of_last_starting_point = offset;
}
if (is_line_terminator) {
line += 1;
column = 1;
} else {
column += 1;
}
previous_code_point = code_point;
}
}
SourceRange SourceCode::range_from_offsets(u32 start_offset, [[maybe_unused]] u32 end_offset) const
{
// If the underlying code is an empty string, the range is 1,1 no matter what.
if (m_code.is_empty())
return { *this, { .line = 1, .column = 1 } };
if (m_cached_positions.is_empty())
fill_position_cache();
CachedPosition current { .position = { .line = 1, .column = 1 }, .offset = 0 };
if (!m_cached_positions.is_empty()) {
CachedPosition const dummy;
size_t nearest_index = 0;
binary_search(m_cached_positions, dummy, &nearest_index,
[&](auto&, auto& starting_point) {
return start_offset - starting_point.offset;
});
current = m_cached_positions[nearest_index];
}
Optional<Position> start;
u32 previous_code_point = 0;
auto view = m_code.utf16_view();
for (auto it = view.iterator_at_code_unit_offset(current.offset); it != view.end(); ++it) {
// If we're on or after the start offset, this is the start position.
if (!start.has_value() && view.iterator_offset(it) >= start_offset) {
start = Position {
.line = current.position.line,
.column = current.position.column,
};
}
u32 code_point = *it;
bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && previous_code_point != '\r') || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point = code_point;
if (is_line_terminator) {
current.position.line += 1;
current.position.column = 1;
continue;
}
current.position.column += 1;
}
// If we didn't find a start position, just return 1,1.
// FIXME: This is a hack. Find a way to return the nicest possible values here.
if (!start.has_value())
return SourceRange { *this, { .line = 1, .column = 1 } };
return SourceRange { *this, *start };
}
}