mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-08 06:09:58 +00:00
LibRegex: Properly track code units in u-v modes
Previously, both string_position and view_index used code unit offsets regardless of mode. Now in unicode mode, these variables track code point positions while string_position_in_code_units is properly updated to reflect code unit offsets.
This commit is contained in:
parent
fb258639d1
commit
5632a52531
Notes:
github-actions[bot]
2025-10-24 19:24:41 +00:00
Author: https://github.com/aplefull
Commit: 5632a52531
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6567
2 changed files with 51 additions and 3 deletions
|
|
@ -156,3 +156,37 @@ test("Unicode properties of strings", () => {
|
|||
expect(re.test(str)).toBeFalse();
|
||||
}
|
||||
});
|
||||
|
||||
test("Unicode matching with u and v flags", () => {
|
||||
const text = "𠮷a𠮷b𠮷";
|
||||
const complexText = "a\u{20BB7}b\u{10FFFF}c";
|
||||
|
||||
const cases = [
|
||||
{ pattern: /𠮷/, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /𠮷/u, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /𠮷/v, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /\p{Script=Han}/u, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /\p{Script=Han}/v, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /./u, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /./v, match: text, expected: ["𠮷"] },
|
||||
{ pattern: /\p{ASCII}/u, match: text, expected: ["a"] },
|
||||
{ pattern: /\p{ASCII}/v, match: text, expected: ["a"] },
|
||||
{ pattern: /x/u, match: text, expected: null },
|
||||
{ pattern: /x/v, match: text, expected: null },
|
||||
{ pattern: /\p{Script=Han}(.)/gu, match: text, expected: ["𠮷a", "𠮷b"] },
|
||||
{ pattern: /\p{Script=Han}(.)/gv, match: text, expected: ["𠮷a", "𠮷b"] },
|
||||
{ pattern: /\P{ASCII}/u, match: complexText, expected: ["\u{20BB7}"] },
|
||||
{ pattern: /\P{ASCII}/v, match: complexText, expected: ["\u{20BB7}"] },
|
||||
{ pattern: /\P{ASCII}/gu, match: complexText, expected: ["\u{20BB7}", "\u{10FFFF}"] },
|
||||
{ pattern: /\P{ASCII}/gv, match: complexText, expected: ["\u{20BB7}", "\u{10FFFF}"] },
|
||||
{ pattern: /./gu, match: text, expected: ["𠮷", "a", "𠮷", "b", "𠮷"] },
|
||||
{ pattern: /./gv, match: text, expected: ["𠮷", "a", "𠮷", "b", "𠮷"] },
|
||||
{ pattern: /(?:)/gu, match: text, expected: ["", "", "", "", "", ""] },
|
||||
{ pattern: /(?:)/gv, match: text, expected: ["", "", "", "", "", ""] },
|
||||
];
|
||||
|
||||
for (const test of cases) {
|
||||
const result = test.match.match(test.pattern);
|
||||
expect(result).toEqual(test.expected);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue