mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-07 21:59:54 +00:00
LibRegex: Support matching unicode multi-character sequences
This commit is contained in:
parent
5b7c9af340
commit
a49c39de32
Notes:
github-actions[bot]
2025-11-26 10:35:48 +00:00
Author: https://github.com/aplefull
Commit: a49c39de32
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6867
Reviewed-by: https://github.com/alimpfard ✅
7 changed files with 462 additions and 34 deletions
|
|
@ -12,7 +12,9 @@
|
|||
#include <LibUnicode/ICU.h>
|
||||
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/uscript.h>
|
||||
#include <unicode/uset.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
|
|
@ -321,6 +323,39 @@ bool is_ecma262_string_property(Property property)
|
|||
}
|
||||
}
|
||||
|
||||
Vector<String> get_property_strings(Property property)
|
||||
{
|
||||
Vector<String> result;
|
||||
|
||||
if (!is_ecma262_string_property(property))
|
||||
return result;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
auto const* icu_set = u_getBinaryPropertySet(static_cast<UProperty>(property.value()), &status);
|
||||
if (!icu_success(status) || !icu_set)
|
||||
return result;
|
||||
|
||||
auto const* unicode_set = icu::UnicodeSet::fromUSet(icu_set);
|
||||
if (!unicode_set)
|
||||
return result;
|
||||
|
||||
auto range_count = unicode_set->getRangeCount();
|
||||
for (int32_t i = 0; i < range_count; ++i) {
|
||||
auto start = unicode_set->getRangeStart(i);
|
||||
auto end = unicode_set->getRangeEnd(i);
|
||||
|
||||
for (auto code_point = start; code_point <= end; ++code_point) {
|
||||
result.append(String::from_code_point(code_point));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const& str : unicode_set->strings()) {
|
||||
result.append(icu_string_to_string(str));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Optional<Script> script_from_string(StringView script)
|
||||
{
|
||||
static auto script_names = []() {
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ bool code_point_has_white_space_property(u32 code_point);
|
|||
|
||||
bool is_ecma262_property(Property);
|
||||
bool is_ecma262_string_property(Property);
|
||||
Vector<String> get_property_strings(Property);
|
||||
|
||||
Optional<Script> script_from_string(StringView);
|
||||
bool code_point_has_script(u32 code_point, Script script);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue