[TextServer] Shape emojis as separate runs.

This commit is contained in:
Pāvels Nadtočajevs 2025-09-02 13:03:50 +03:00
parent 825ef2387f
commit ddde1c07a6
No known key found for this signature in database
GPG key ID: 8413210218EF35D2
4 changed files with 44 additions and 9 deletions

View file

@ -32,10 +32,20 @@
// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
}
inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {
if (p_next == 0xFE0E) { // Variation Selector-15
return false;
} else if (p_next == 0xFE0F) { // Variation Selector-16
return true;
} else {
return u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC);
}
}
ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
struct ParenStackEntry {
int pair_index;
@ -65,11 +75,16 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length
script_code = USCRIPT_COMMON;
for (script_start = script_end; script_end < p_length; script_end++) {
UChar32 ch = str[script_end];
UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;
UScriptCode sc = uscript_getScript(ch, &err);
if (U_FAILURE(err)) {
memfree(paren_stack);
ERR_FAIL_MSG(u_errorName(err));
}
if (is_emoji(ch, n)) {
sc = USCRIPT_SYMBOLS_EMOJI;
}
if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
// If it's an open character, push it onto the stack.
@ -96,7 +111,12 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length
}
}
if (same_script(script_code, sc)) {
if (script_code == USCRIPT_SYMBOLS_EMOJI && script_code != sc) {
UCharCategory cat = (UCharCategory)u_charType(ch);
if ((cat >= U_SPACE_SEPARATOR && cat <= U_CONTROL_CHAR) || (cat >= U_DASH_PUNCTUATION && cat <= U_OTHER_PUNCTUATION) || (cat >= U_INITIAL_PUNCTUATION && cat <= U_FINAL_PUNCTUATION)) {
break;
}
} else if (same_script(script_code, sc)) {
if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
script_code = sc;
// Now that we have a final script code, fix any open characters we pushed before we knew the script code.