2020-08-11 12:11:28 +03:00
/**************************************************************************/
/* script_iterator.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
# include "script_iterator.h"
2021-09-03 09:03:24 +03:00
// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
2025-11-11 12:05:46 +02:00
inline constexpr UChar32 ZERO_WIDTH_JOINER = 0x200d ;
inline constexpr UChar32 VARIATION_SELECTOR_15 = 0xfe0e ;
inline constexpr UChar32 VARIATION_SELECTOR_16 = 0xfe0f ;
2025-09-02 13:03:50 +03:00
inline bool ScriptIterator : : same_script ( int32_t p_script_one , int32_t p_script_two ) {
2020-08-11 12:11:28 +03:00
return p_script_one < = USCRIPT_INHERITED | | p_script_two < = USCRIPT_INHERITED | | p_script_one = = p_script_two ;
}
2025-09-02 13:03:50 +03:00
inline bool ScriptIterator : : is_emoji ( UChar32 p_c , UChar32 p_next ) {
2025-11-11 12:05:46 +02:00
if ( p_next = = VARIATION_SELECTOR_15 & & ( u_hasBinaryProperty ( p_c , UCHAR_EMOJI ) | | u_hasBinaryProperty ( p_c , UCHAR_EXTENDED_PICTOGRAPHIC ) ) ) {
2025-09-02 13:03:50 +03:00
return false ;
2025-11-11 12:05:46 +02:00
} else if ( p_next = = VARIATION_SELECTOR_16 & & ( u_hasBinaryProperty ( p_c , UCHAR_EMOJI ) | | u_hasBinaryProperty ( p_c , UCHAR_EXTENDED_PICTOGRAPHIC ) ) ) {
2025-09-02 13:03:50 +03:00
return true ;
} else {
2025-11-11 12:05:46 +02:00
return u_hasBinaryProperty ( p_c , UCHAR_EMOJI_PRESENTATION ) | | u_hasBinaryProperty ( p_c , UCHAR_EMOJI_MODIFIER ) | | u_hasBinaryProperty ( p_c , UCHAR_REGIONAL_INDICATOR ) ;
2025-09-02 13:03:50 +03:00
}
}
2020-08-11 12:11:28 +03:00
ScriptIterator : : ScriptIterator ( const String & p_string , int p_start , int p_length ) {
struct ParenStackEntry {
int pair_index ;
UScriptCode script_code ;
} ;
2025-11-19 08:34:30 +02:00
struct EmojiSubrunEntry {
int start ;
int end ;
} ;
2020-08-11 12:11:28 +03:00
if ( p_start > = p_length ) {
p_start = p_length - 1 ;
}
if ( p_start < 0 ) {
p_start = 0 ;
}
2021-09-03 09:03:24 +03:00
int paren_size = PAREN_STACK_DEPTH ;
2025-11-19 08:34:30 +02:00
ParenStackEntry starter_paren_stack [ PAREN_STACK_DEPTH ] ;
ParenStackEntry * paren_stack = starter_paren_stack ;
int emoji_size = EMOJI_STACK_DEPTH ;
EmojiSubrunEntry starter_emoji_stack [ EMOJI_STACK_DEPTH ] ;
EmojiSubrunEntry * emoji_stack = starter_emoji_stack ;
2020-08-11 12:11:28 +03:00
int script_start ;
int script_end = p_start ;
UScriptCode script_code ;
int paren_sp = - 1 ;
int start_sp = paren_sp ;
UErrorCode err = U_ZERO_ERROR ;
2020-12-01 15:03:31 +02:00
const char32_t * str = p_string . ptr ( ) ;
2020-08-11 12:11:28 +03:00
do {
script_code = USCRIPT_COMMON ;
2025-11-19 08:34:30 +02:00
int emoji_sp = - 1 ;
bool emoji_run = false ;
2020-08-11 12:11:28 +03:00
for ( script_start = script_end ; script_end < p_length ; script_end + + ) {
2020-12-01 15:03:31 +02:00
UChar32 ch = str [ script_end ] ;
2025-09-02 13:03:50 +03:00
UChar32 n = ( script_end + 1 < p_length ) ? str [ script_end + 1 ] : 0 ;
2025-11-19 08:34:30 +02:00
if ( is_emoji ( ch , n ) ) {
if ( ! emoji_run ) {
emoji_run = true ;
emoji_sp + + ;
if ( unlikely ( emoji_sp > = emoji_size ) ) {
emoji_size + = EMOJI_STACK_DEPTH ;
if ( emoji_stack = = starter_emoji_stack ) {
emoji_stack = static_cast < EmojiSubrunEntry * > ( memalloc ( emoji_size * sizeof ( EmojiSubrunEntry ) ) ) ;
} else {
emoji_stack = static_cast < EmojiSubrunEntry * > ( memrealloc ( emoji_stack , emoji_size * sizeof ( EmojiSubrunEntry ) ) ) ;
}
}
emoji_stack [ emoji_sp ] . start = script_end ;
emoji_stack [ emoji_sp ] . end = script_end ;
}
} else if ( emoji_run & & ch ! = ZERO_WIDTH_JOINER & & ch ! = VARIATION_SELECTOR_16 & & ! ( u_hasBinaryProperty ( ch , UCHAR_EXTENDED_PICTOGRAPHIC ) & & n ! = VARIATION_SELECTOR_15 ) ) {
emoji_run = false ;
emoji_stack [ emoji_sp ] . end = script_end ;
}
2020-08-11 12:11:28 +03:00
UScriptCode sc = uscript_getScript ( ch , & err ) ;
if ( U_FAILURE ( err ) ) {
2025-11-19 08:34:30 +02:00
if ( paren_stack ! = starter_paren_stack ) {
2025-08-08 03:44:21 -07:00
memfree ( paren_stack ) ;
}
2020-08-11 12:11:28 +03:00
ERR_FAIL_MSG ( u_errorName ( err ) ) ;
}
2025-09-02 13:03:50 +03:00
2020-08-11 12:11:28 +03:00
if ( u_getIntPropertyValue ( ch , UCHAR_BIDI_PAIRED_BRACKET_TYPE ) ! = U_BPT_NONE ) {
if ( u_getIntPropertyValue ( ch , UCHAR_BIDI_PAIRED_BRACKET_TYPE ) = = U_BPT_OPEN ) {
2021-09-03 09:03:24 +03:00
// If it's an open character, push it onto the stack.
paren_sp + + ;
if ( unlikely ( paren_sp > = paren_size ) ) {
// If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
paren_size + = PAREN_STACK_DEPTH ;
2025-11-19 08:34:30 +02:00
if ( paren_stack = = starter_paren_stack ) {
2025-08-08 03:44:21 -07:00
paren_stack = static_cast < ParenStackEntry * > ( memalloc ( paren_size * sizeof ( ParenStackEntry ) ) ) ;
} else {
paren_stack = static_cast < ParenStackEntry * > ( memrealloc ( paren_stack , paren_size * sizeof ( ParenStackEntry ) ) ) ;
}
2021-09-03 09:03:24 +03:00
}
paren_stack [ paren_sp ] . pair_index = ch ;
2020-08-11 12:11:28 +03:00
paren_stack [ paren_sp ] . script_code = script_code ;
} else if ( paren_sp > = 0 ) {
2022-02-10 12:00:11 +01:00
// If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
2020-08-11 12:11:28 +03:00
UChar32 paired_ch = u_getBidiPairedBracket ( ch ) ;
while ( paren_sp > = 0 & & paren_stack [ paren_sp ] . pair_index ! = paired_ch ) {
paren_sp - = 1 ;
}
2021-04-05 14:09:59 +02:00
if ( paren_sp < start_sp ) {
2020-08-11 12:11:28 +03:00
start_sp = paren_sp ;
2021-04-05 14:09:59 +02:00
}
if ( paren_sp > = 0 ) {
2020-08-11 12:11:28 +03:00
sc = paren_stack [ paren_sp ] . script_code ;
2021-04-05 14:09:59 +02:00
}
2020-08-11 12:11:28 +03:00
}
}
2025-11-19 08:34:30 +02:00
if ( same_script ( script_code , sc ) ) {
2020-08-11 12:11:28 +03:00
if ( script_code < = USCRIPT_INHERITED & & sc > USCRIPT_INHERITED ) {
script_code = sc ;
2021-09-03 09:03:24 +03:00
// Now that we have a final script code, fix any open characters we pushed before we knew the script code.
2020-08-11 12:11:28 +03:00
while ( start_sp < paren_sp ) {
paren_stack [ + + start_sp ] . script_code = script_code ;
}
}
if ( ( u_getIntPropertyValue ( ch , UCHAR_BIDI_PAIRED_BRACKET_TYPE ) = = U_BPT_CLOSE ) & & paren_sp > = 0 ) {
2021-09-03 09:03:24 +03:00
// If this character is a close paired character pop the matching open character from the stack.
2020-08-11 12:11:28 +03:00
paren_sp - = 1 ;
if ( start_sp > = 0 ) {
start_sp - = 1 ;
}
}
} else {
break ;
}
}
2025-11-19 08:34:30 +02:00
if ( emoji_run ) {
emoji_stack [ emoji_sp ] . end = script_end ;
}
2020-08-11 12:11:28 +03:00
2025-11-19 08:34:30 +02:00
for ( int sub = 0 ; sub < = emoji_sp ; sub + + ) {
if ( emoji_stack [ sub ] . start > script_start ) {
ScriptRange rng ;
rng . script = hb_icu_script_to_script ( script_code ) ;
rng . start = script_start ;
rng . end = emoji_stack [ sub ] . start ;
script_ranges . push_back ( rng ) ;
}
ScriptRange rng ;
rng . script = ( hb_script_t ) HB_TAG ( ' Z ' , ' s ' , ' y ' , ' e ' ) ;
rng . start = emoji_stack [ sub ] . start ;
rng . end = emoji_stack [ sub ] . end ;
script_ranges . push_back ( rng ) ;
2020-08-11 12:11:28 +03:00
2025-11-19 08:34:30 +02:00
script_start = emoji_stack [ sub ] . end ;
}
if ( script_start ! = script_end ) {
ScriptRange rng ;
rng . script = hb_icu_script_to_script ( script_code ) ;
rng . start = script_start ;
rng . end = script_end ;
script_ranges . push_back ( rng ) ;
}
if ( emoji_stack ! = starter_emoji_stack ) {
memfree ( emoji_stack ) ;
}
2020-08-11 12:11:28 +03:00
} while ( script_end < p_length ) ;
2021-09-03 09:03:24 +03:00
2025-11-19 08:34:30 +02:00
if ( paren_stack ! = starter_paren_stack ) {
2025-08-08 03:44:21 -07:00
memfree ( paren_stack ) ;
}
2020-08-11 12:11:28 +03:00
}