diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 45d8497af81..359b7350490 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -166,11 +166,13 @@ void String::append_latin1(const Span &p_cstr) { *dst = 0; } -void String::append_utf32(const Span &p_cstr) { +Error String::append_utf32(const Span &p_cstr) { if (p_cstr.is_empty()) { - return; + return OK; } + Error error = OK; + const int prev_length = length(); resize_uninitialized(prev_length + p_cstr.size() + 1); const char32_t *src = p_cstr.ptr(); @@ -184,29 +186,29 @@ void String::append_utf32(const Span &p_cstr) { // NUL in string is allowed by the unicode standard, but unsupported in our implementation right now. print_unicode_error("Unexpected NUL character", true); *dst = _replacement_char; + error = ERR_PARSE_ERROR; } else if (unlikely((chr & 0xfffff800) == 0xd800)) { print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true); *dst = _replacement_char; + error = ERR_PARSE_ERROR; } else if (unlikely(chr > 0x10ffff)) { print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true); *dst = _replacement_char; + error = ERR_PARSE_ERROR; } else { *dst = chr; } } *dst = 0; + return error; } -// assumes the following have already been validated: -// p_char != nullptr -// p_length > 0 -// p_length <= p_char strlen -// p_char is a valid UTF32 string -void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { - resize_uninitialized(p_length + 1); // + 1 for \0 - char32_t *dst = ptrw(); - memcpy(dst, p_char, p_length * sizeof(char32_t)); - *(dst + p_length) = _null; +void String::append_utf32_unchecked(const Span &p_span) { + const int prev_length = length(); + resize_uninitialized(prev_length + p_span.size() + 1); // + 1 for \0 + char32_t *dst = ptrw() + prev_length; + memcpy(dst, p_span.ptr(), p_span.size() * sizeof(char32_t)); + *(dst + p_span.size()) = _null; } String String::operator+(const String &p_str) const { @@ -266,7 +268,7 @@ String &String::operator+=(const String &p_str) { *this = p_str; return *this; } - append_utf32(p_str); + append_utf32_unchecked(p_str); return *this; } @@ -3048,7 +3050,7 @@ String String::substr(int p_from, int p_chars) const { } String s; - s.copy_from_unchecked(&get_data()[p_from], p_chars); + s.append_utf32_unchecked(Span(ptr() + p_from, p_chars)); return s; } @@ -4225,7 +4227,7 @@ String String::left(int p_len) const { } String s; - s.copy_from_unchecked(&get_data()[0], p_len); + s.append_utf32_unchecked(Span(ptr(), p_len)); return s; } @@ -4243,7 +4245,7 @@ String String::right(int p_len) const { } String s; - s.copy_from_unchecked(&get_data()[length() - p_len], p_len); + s.append_utf32_unchecked(Span(ptr() + length() - p_len, p_len)); return s; } diff --git a/core/string/ustring.h b/core/string/ustring.h index 9bcc68064f8..596fdd2e203 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -268,9 +268,6 @@ class [[nodiscard]] String { static constexpr char32_t _null = 0; static constexpr char32_t _replacement_char = 0xfffd; - // Known-length copy. - void copy_from_unchecked(const char32_t *p_char, int p_length); - // NULL-terminated c string copy - automatically parse the string to find the length. void append_latin1(const char *p_cstr) { append_latin1(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0)); @@ -278,17 +275,6 @@ class [[nodiscard]] String { void append_utf32(const char32_t *p_cstr) { append_utf32(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0)); } - - // wchar_t copy_from depends on the platform. - void append_wstring(const Span &p_cstr) { -#ifdef WINDOWS_ENABLED - // wchar_t is 16-bit, parse as UTF-16 - append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size()); -#else - // wchar_t is 32-bit, copy directly - append_utf32((Span &)p_cstr); -#endif - } void append_wstring(const wchar_t *p_cstr) { #ifdef WINDOWS_ENABLED // wchar_t is 16-bit, parse as UTF-16 @@ -562,13 +548,38 @@ public: } static String utf16(const Span &p_range) { return utf16(p_range.ptr(), p_range.size()); } - void append_utf32(const Span &p_cstr); + // wchar_t copy_from depends on the platform. + Error append_wstring(const Span &p_cstr) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit, parse as UTF-16 + return append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size()); +#else + // wchar_t is 32-bit, copy directly + return append_utf32((Span &)p_cstr); +#endif + } + static String wstring(const Span &p_string) { + String string; + string.append_wstring(p_string); + return string; + } + + Error append_utf32(const Span &p_cstr); static String utf32(const Span &p_span) { String string; string.append_utf32(p_span); return string; } + // Like append_utf32, but does not check the string for string integrity (and is thus faster). + // Prefer this function for conversion from trusted utf32 strings. + void append_utf32_unchecked(const Span &p_span); + static String utf32_unchecked(const Span &p_string) { + String string; + string.append_utf32_unchecked(p_string); + return string; + } + static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */ static uint32_t hash(const char32_t *p_cstr); /* hash the string */ static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */