ladybird/Libraries/LibJS/Runtime/Intl/Segments.cpp
Andreas Kling 8b8136b480 LibJS: Use Substring in Intl.Segmenter
Keep the primitive string that segment() creates alongside the UTF-16
buffer used by LibUnicode. Segment data objects can then return lazy
Substring instances for "segment" and reuse the original
PrimitiveString for "input" instead of copying both strings.

Add a rope-backed UTF-16 segmenter test that exercises both
containing() and iterator results.
2026-04-11 00:35:36 +02:00

42 lines
1.5 KiB
C++

/*
* Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/Intl/Segments.h>
#include <LibJS/Runtime/Intl/SegmentsPrototype.h>
namespace JS::Intl {
GC_DEFINE_ALLOCATOR(Segments);
// 19.5.1 CreateSegmentsObject ( segmenter, string ), https://tc39.es/ecma402/#sec-createsegmentsobject
GC::Ref<Segments> Segments::create(Realm& realm, Unicode::Segmenter const& segmenter, GC::Ref<PrimitiveString> string)
{
// 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
// 2. Let segments be OrdinaryObjectCreate(%IntlSegmentsPrototype%, internalSlotsList).
// 3. Set segments.[[SegmentsSegmenter]] to segmenter.
// 4. Set segments.[[SegmentsString]] to string.
// 5. Return segments.
return realm.create<Segments>(realm, segmenter, move(string));
}
// 19.5 Segments Objects, https://tc39.es/ecma402/#sec-segments-objects
Segments::Segments(Realm& realm, Unicode::Segmenter const& segmenter, GC::Ref<PrimitiveString> string)
: Object(ConstructWithPrototypeTag::Tag, realm.intrinsics().intl_segments_prototype())
, m_segments_segmenter(segmenter.clone())
, m_segments_string_value(string)
, m_segments_string(string->utf16_string())
{
m_segments_segmenter->set_segmented_text(m_segments_string);
}
void Segments::visit_edges(Cell::Visitor& visitor)
{
Base::visit_edges(visitor);
visitor.visit(m_segments_string_value);
}
}