Expose copy_from_unchecked as append_utf32_unchecked and String::utf32_unchecked in String for high performance string copies. Expose append_wstring and String::wstring for platform strings.

This commit is contained in:
Lukas Tenbrink 2025-03-29 23:27:21 +01:00
parent 8b4b93a82e
commit d1fd42bf3c
2 changed files with 44 additions and 31 deletions

View file

@ -166,11 +166,13 @@ void String::append_latin1(const Span<char> &p_cstr) {
*dst = 0; *dst = 0;
} }
void String::append_utf32(const Span<char32_t> &p_cstr) { Error String::append_utf32(const Span<char32_t> &p_cstr) {
if (p_cstr.is_empty()) { if (p_cstr.is_empty()) {
return; return OK;
} }
Error error = OK;
const int prev_length = length(); const int prev_length = length();
resize_uninitialized(prev_length + p_cstr.size() + 1); resize_uninitialized(prev_length + p_cstr.size() + 1);
const char32_t *src = p_cstr.ptr(); const char32_t *src = p_cstr.ptr();
@ -184,29 +186,29 @@ void String::append_utf32(const Span<char32_t> &p_cstr) {
// NUL in string is allowed by the unicode standard, but unsupported in our implementation right now. // NUL in string is allowed by the unicode standard, but unsupported in our implementation right now.
print_unicode_error("Unexpected NUL character", true); print_unicode_error("Unexpected NUL character", true);
*dst = _replacement_char; *dst = _replacement_char;
error = ERR_PARSE_ERROR;
} else if (unlikely((chr & 0xfffff800) == 0xd800)) { } else if (unlikely((chr & 0xfffff800) == 0xd800)) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true); print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true);
*dst = _replacement_char; *dst = _replacement_char;
error = ERR_PARSE_ERROR;
} else if (unlikely(chr > 0x10ffff)) { } else if (unlikely(chr > 0x10ffff)) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true); print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true);
*dst = _replacement_char; *dst = _replacement_char;
error = ERR_PARSE_ERROR;
} else { } else {
*dst = chr; *dst = chr;
} }
} }
*dst = 0; *dst = 0;
return error;
} }
// assumes the following have already been validated: void String::append_utf32_unchecked(const Span<char32_t> &p_span) {
// p_char != nullptr const int prev_length = length();
// p_length > 0 resize_uninitialized(prev_length + p_span.size() + 1); // + 1 for \0
// p_length <= p_char strlen char32_t *dst = ptrw() + prev_length;
// p_char is a valid UTF32 string memcpy(dst, p_span.ptr(), p_span.size() * sizeof(char32_t));
void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { *(dst + p_span.size()) = _null;
resize_uninitialized(p_length + 1); // + 1 for \0
char32_t *dst = ptrw();
memcpy(dst, p_char, p_length * sizeof(char32_t));
*(dst + p_length) = _null;
} }
String String::operator+(const String &p_str) const { String String::operator+(const String &p_str) const {
@ -266,7 +268,7 @@ String &String::operator+=(const String &p_str) {
*this = p_str; *this = p_str;
return *this; return *this;
} }
append_utf32(p_str); append_utf32_unchecked(p_str);
return *this; return *this;
} }
@ -3048,7 +3050,7 @@ String String::substr(int p_from, int p_chars) const {
} }
String s; String s;
s.copy_from_unchecked(&get_data()[p_from], p_chars); s.append_utf32_unchecked(Span(ptr() + p_from, p_chars));
return s; return s;
} }
@ -4225,7 +4227,7 @@ String String::left(int p_len) const {
} }
String s; String s;
s.copy_from_unchecked(&get_data()[0], p_len); s.append_utf32_unchecked(Span(ptr(), p_len));
return s; return s;
} }
@ -4243,7 +4245,7 @@ String String::right(int p_len) const {
} }
String s; String s;
s.copy_from_unchecked(&get_data()[length() - p_len], p_len); s.append_utf32_unchecked(Span(ptr() + length() - p_len, p_len));
return s; return s;
} }

View file

@ -268,9 +268,6 @@ class [[nodiscard]] String {
static constexpr char32_t _null = 0; static constexpr char32_t _null = 0;
static constexpr char32_t _replacement_char = 0xfffd; static constexpr char32_t _replacement_char = 0xfffd;
// Known-length copy.
void copy_from_unchecked(const char32_t *p_char, int p_length);
// NULL-terminated c string copy - automatically parse the string to find the length. // NULL-terminated c string copy - automatically parse the string to find the length.
void append_latin1(const char *p_cstr) { void append_latin1(const char *p_cstr) {
append_latin1(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0)); append_latin1(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
@ -278,17 +275,6 @@ class [[nodiscard]] String {
void append_utf32(const char32_t *p_cstr) { void append_utf32(const char32_t *p_cstr) {
append_utf32(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0)); append_utf32(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
} }
// wchar_t copy_from depends on the platform.
void append_wstring(const Span<wchar_t> &p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
#else
// wchar_t is 32-bit, copy directly
append_utf32((Span<char32_t> &)p_cstr);
#endif
}
void append_wstring(const wchar_t *p_cstr) { void append_wstring(const wchar_t *p_cstr) {
#ifdef WINDOWS_ENABLED #ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16 // wchar_t is 16-bit, parse as UTF-16
@ -562,13 +548,38 @@ public:
} }
static String utf16(const Span<char16_t> &p_range) { return utf16(p_range.ptr(), p_range.size()); } static String utf16(const Span<char16_t> &p_range) { return utf16(p_range.ptr(), p_range.size()); }
void append_utf32(const Span<char32_t> &p_cstr); // wchar_t copy_from depends on the platform.
Error append_wstring(const Span<wchar_t> &p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
return append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
#else
// wchar_t is 32-bit, copy directly
return append_utf32((Span<char32_t> &)p_cstr);
#endif
}
static String wstring(const Span<wchar_t> &p_string) {
String string;
string.append_wstring(p_string);
return string;
}
Error append_utf32(const Span<char32_t> &p_cstr);
static String utf32(const Span<char32_t> &p_span) { static String utf32(const Span<char32_t> &p_span) {
String string; String string;
string.append_utf32(p_span); string.append_utf32(p_span);
return string; return string;
} }
// Like append_utf32, but does not check the string for string integrity (and is thus faster).
// Prefer this function for conversion from trusted utf32 strings.
void append_utf32_unchecked(const Span<char32_t> &p_span);
static String utf32_unchecked(const Span<char32_t> &p_string) {
String string;
string.append_utf32_unchecked(p_string);
return string;
}
static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */ static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
static uint32_t hash(const char32_t *p_cstr); /* hash the string */ static uint32_t hash(const char32_t *p_cstr); /* hash the string */
static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */ static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */