| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2024-11-03 17:13:56 -05:00
										 |  |  |  * Copyright (c) 2023-2024, Tim Flynn <trflynn89@ladybird.org> | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  |  * | 
					
						
							|  |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <AK/String.h>
 | 
					
						
							|  |  |  | #include <AK/StringBuilder.h>
 | 
					
						
							| 
									
										
										
										
											2024-11-03 17:13:56 -05:00
										 |  |  | #include <LibUnicode/CharacterTypes.h>
 | 
					
						
							| 
									
										
										
										
											2024-06-23 09:14:27 -04:00
										 |  |  | #include <LibUnicode/ICU.h>
 | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include <unicode/bytestream.h>
 | 
					
						
							|  |  |  | #include <unicode/casemap.h>
 | 
					
						
							|  |  |  | #include <unicode/stringoptions.h>
 | 
					
						
							| 
									
										
										
										
											2024-10-30 20:36:26 +01:00
										 |  |  | #include <unicode/translit.h>
 | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | // This file contains definitions of AK::String methods which require UCD data.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace AK { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  | struct ResolvedLocale { | 
					
						
							|  |  |  |     ByteString buffer; | 
					
						
							|  |  |  |     char const* locale { nullptr }; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static ResolvedLocale resolve_locale(Optional<StringView> const& locale) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (!locale.has_value()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ResolvedLocale resolved_locale; | 
					
						
							|  |  |  |     resolved_locale.buffer = *locale; | 
					
						
							|  |  |  |     resolved_locale.locale = resolved_locale.buffer.characters(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return resolved_locale; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     UErrorCode status = U_ZERO_ERROR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder builder { bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     icu::StringByteSink sink { &builder }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto resolved_locale = resolve_locale(locale); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-23 09:14:27 -04:00
										 |  |  |     icu::CaseMap::utf8ToLower(resolved_locale.locale, 0, Unicode::icu_string_piece(*this), sink, nullptr, status); | 
					
						
							|  |  |  |     if (Unicode::icu_failure(status)) | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |         return Error::from_string_literal("Unable to convert string to lowercase"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-21 14:45:09 +02:00
										 |  |  |     return builder.to_string_without_validation(); | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     UErrorCode status = U_ZERO_ERROR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder builder { bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     icu::StringByteSink sink { &builder }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto resolved_locale = resolve_locale(locale); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-23 09:14:27 -04:00
										 |  |  |     icu::CaseMap::utf8ToUpper(resolved_locale.locale, 0, Unicode::icu_string_piece(*this), sink, nullptr, status); | 
					
						
							|  |  |  |     if (Unicode::icu_failure(status)) | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |         return Error::from_string_literal("Unable to convert string to uppercase"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-21 14:45:09 +02:00
										 |  |  |     return builder.to_string_without_validation(); | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-27 20:56:50 +13:00
										 |  |  | ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation) const | 
					
						
							| 
									
										
										
										
											2023-01-16 11:28:27 -05:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     UErrorCode status = U_ZERO_ERROR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder builder { bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     icu::StringByteSink sink { &builder }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto resolved_locale = resolve_locale(locale); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     u32 options = 0; | 
					
						
							|  |  |  |     if (trailing_code_point_transformation == TrailingCodePointTransformation::PreserveExisting) | 
					
						
							|  |  |  |         options |= U_TITLECASE_NO_LOWERCASE; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-23 09:14:27 -04:00
										 |  |  |     icu::CaseMap::utf8ToTitle(resolved_locale.locale, options, nullptr, Unicode::icu_string_piece(*this), sink, nullptr, status); | 
					
						
							|  |  |  |     if (Unicode::icu_failure(status)) | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |         return Error::from_string_literal("Unable to convert string to titlecase"); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-21 14:45:09 +02:00
										 |  |  |     return builder.to_string_without_validation(); | 
					
						
							| 
									
										
										
										
											2023-01-16 11:28:27 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-30 20:36:26 +01:00
										 |  |  | ErrorOr<String> String::to_fullwidth() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     UErrorCode status = U_ZERO_ERROR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto const transliterator = adopt_own_if_nonnull(icu::Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, status)); | 
					
						
							|  |  |  |     if (Unicode::icu_failure(status)) { | 
					
						
							|  |  |  |         return Error::from_string_literal("Unable to create transliterator"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto icu_string = Unicode::icu_string(bytes_as_string_view()); | 
					
						
							|  |  |  |     transliterator->transliterate(icu_string); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return Unicode::icu_string_to_string(icu_string); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  | static ErrorOr<void> build_casefold_string(StringView string, StringBuilder& builder) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     UErrorCode status = U_ZERO_ERROR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     icu::StringByteSink sink { &builder }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-23 09:14:27 -04:00
										 |  |  |     icu::CaseMap::utf8Fold(0, Unicode::icu_string_piece(string), sink, nullptr, status); | 
					
						
							|  |  |  |     if (Unicode::icu_failure(status)) | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |         return Error::from_string_literal("Unable to casefold string"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return {}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-17 11:30:10 -05:00
										 |  |  | ErrorOr<String> String::to_casefold() const | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     StringBuilder builder { bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     TRY(build_casefold_string(*this, builder)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-21 14:45:09 +02:00
										 |  |  |     return builder.to_string_without_validation(); | 
					
						
							| 
									
										
										
										
											2023-01-17 11:30:10 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-08 13:03:04 -05:00
										 |  |  | bool String::equals_ignoring_case(String const& other) const | 
					
						
							| 
									
										
										
										
											2023-01-17 11:30:10 -05:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     StringBuilder lhs_builder { bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     if (build_casefold_string(*this, lhs_builder).is_error()) | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder rhs_builder { other.bytes_as_string_view().length() }; | 
					
						
							|  |  |  |     if (build_casefold_string(other, rhs_builder).is_error()) | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return lhs_builder.string_view() == rhs_builder.string_view(); | 
					
						
							| 
									
										
										
										
											2023-01-17 11:30:10 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-31 15:51:40 -04:00
										 |  |  | Optional<size_t> String::find_byte_offset_ignoring_case(StringView needle, size_t from_byte_offset) const | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     auto haystack = bytes_as_string_view().substring_view(from_byte_offset); | 
					
						
							|  |  |  |     if (haystack.is_empty()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder lhs_builder { haystack.length() }; | 
					
						
							|  |  |  |     if (build_casefold_string(haystack, lhs_builder).is_error()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder rhs_builder { needle.length() }; | 
					
						
							|  |  |  |     if (build_casefold_string(needle, rhs_builder).is_error()) | 
					
						
							|  |  |  |         return false; | 
					
						
							| 
									
										
										
										
											2024-05-31 15:51:40 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-19 16:39:30 -04:00
										 |  |  |     if (auto index = lhs_builder.string_view().find(rhs_builder.string_view()); index.has_value()) | 
					
						
							| 
									
										
										
										
											2024-05-31 15:51:40 -04:00
										 |  |  |         return *index + from_byte_offset; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return {}; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-03 17:13:56 -05:00
										 |  |  | ErrorOr<String> String::trim_whitespace(TrimMode mode) const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     auto code_points = this->code_points(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Optional<size_t> start; | 
					
						
							|  |  |  |     size_t length = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (auto it = code_points.begin(); it != code_points.end(); ++it) { | 
					
						
							|  |  |  |         if (Unicode::code_point_has_white_space_property(*it)) | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         auto offset = code_points.byte_offset_of(it); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (!start.has_value()) | 
					
						
							|  |  |  |             start = offset; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         length = offset + it.underlying_code_point_length_in_bytes(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (mode == TrimMode::Right) | 
					
						
							|  |  |  |         start = 0; | 
					
						
							|  |  |  |     if (mode == TrimMode::Left) | 
					
						
							|  |  |  |         length = bytes_as_string_view().length(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!start.has_value() || start == length) | 
					
						
							|  |  |  |         return String {}; | 
					
						
							|  |  |  |     if (start == 0uz && length == bytes_as_string_view().length()) | 
					
						
							|  |  |  |         return *this; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return substring_from_byte_offset_with_shared_superstring(*start, length - *start); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-08 16:33:30 -05:00
										 |  |  | } |