| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |  * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org> | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  |  * | 
					
						
							|  |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  | #include <AK/CharacterTypes.h>
 | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | #include <AK/Utf8View.h>
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:11:30 -04:00
										 |  |  | #include <LibLocale/Locale.h>
 | 
					
						
							|  |  |  | #include <LibLocale/NumberFormat.h>
 | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | #include <LibUnicode/CharacterTypes.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if ENABLE_UNICODE_DATA
 | 
					
						
							|  |  |  | #    include <LibUnicode/UnicodeData.h>
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  | namespace Locale { | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-27 11:12:01 -05:00
										 |  |  | ErrorOr<Optional<StringView>> __attribute__((weak)) get_number_system_symbol(StringView, StringView, NumericSymbol) { return OptionalNone {}; } | 
					
						
							|  |  |  | ErrorOr<Optional<NumberGroupings>> __attribute__((weak)) get_number_system_groupings(StringView, StringView) { return OptionalNone {}; } | 
					
						
							|  |  |  | ErrorOr<Optional<NumberFormat>> __attribute__((weak)) get_standard_number_system_format(StringView, StringView, StandardNumberFormatType) { return OptionalNone {}; } | 
					
						
							|  |  |  | ErrorOr<Vector<NumberFormat>> __attribute__((weak)) get_compact_number_system_formats(StringView, StringView, CompactNumberFormatType) { return Vector<NumberFormat> {}; } | 
					
						
							| 
									
										
										
										
											2023-02-02 19:54:47 -05:00
										 |  |  | ErrorOr<Vector<NumberFormat>> __attribute__((weak)) get_unit_formats(StringView, StringView, Style) { return Vector<NumberFormat> {}; } | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-05 19:02:54 +00:00
										 |  |  | Optional<ReadonlySpan<u32>> __attribute__((weak)) get_digits_for_number_system(StringView) | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2022-01-11 18:42:07 -05:00
										 |  |  |     // Fall back to "latn" digits when Unicode data generation is disabled.
 | 
					
						
							|  |  |  |     constexpr Array<u32, 10> digits { { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 } }; | 
					
						
							|  |  |  |     return digits.span(); | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  | ErrorOr<String> replace_digits_for_number_system(StringView system, StringView number) | 
					
						
							| 
									
										
										
										
											2022-01-11 18:42:07 -05:00
										 |  |  | { | 
					
						
							|  |  |  |     auto digits = get_digits_for_number_system(system); | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  |     if (!digits.has_value()) | 
					
						
							| 
									
										
										
										
											2022-01-11 18:42:07 -05:00
										 |  |  |         digits = get_digits_for_number_system("latn"sv); | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  |     VERIFY(digits.has_value()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (auto ch : number) { | 
					
						
							|  |  |  |         if (is_ascii_digit(ch)) { | 
					
						
							|  |  |  |             u32 digit = digits->at(parse_ascii_digit(ch)); | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |             TRY(builder.try_append_code_point(digit)); | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |             TRY(builder.try_append(ch)); | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |     return builder.to_string(); | 
					
						
							| 
									
										
										
										
											2022-01-11 10:07:45 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-25 08:53:40 -04:00
										 |  |  | #if ENABLE_UNICODE_DATA
 | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  | static u32 last_code_point(StringView string) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     Utf8View utf8_string { string }; | 
					
						
							|  |  |  |     u32 code_point = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for (auto it = utf8_string.begin(); it != utf8_string.end(); ++it) | 
					
						
							|  |  |  |         code_point = *it; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return code_point; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-08-25 08:53:40 -04:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | // https://www.unicode.org/reports/tr35/tr35-numbers.html#Currencies
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  | ErrorOr<Optional<String>> augment_currency_format_pattern([[maybe_unused]] StringView currency_display, [[maybe_unused]] StringView base_pattern) | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | { | 
					
						
							|  |  |  | #if ENABLE_UNICODE_DATA
 | 
					
						
							|  |  |  |     constexpr auto number_key = "{number}"sv; | 
					
						
							|  |  |  |     constexpr auto currency_key = "{currency}"sv; | 
					
						
							|  |  |  |     constexpr auto spacing = "\u00A0"sv; // No-Break Space (NBSP)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto number_index = base_pattern.find(number_key); | 
					
						
							|  |  |  |     VERIFY(number_index.has_value()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     auto currency_index = base_pattern.find(currency_key); | 
					
						
							|  |  |  |     VERIFY(currency_index.has_value()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Utf8View utf8_currency_display { currency_display }; | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |     Optional<String> currency_key_with_spacing; | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if (*number_index < *currency_index) { | 
					
						
							|  |  |  |         u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *currency_index)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |         if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) { | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  |             u32 first_currency_code_point = *utf8_currency_display.begin(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |             if (!Unicode::code_point_has_general_category(first_currency_code_point, Unicode::GeneralCategory::Symbol)) | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |                 currency_key_with_spacing = TRY(String::formatted("{}{}", spacing, currency_key)); | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         u32 last_pattern_code_point = last_code_point(base_pattern.substring_view(0, *number_index)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |         if (!Unicode::code_point_has_general_category(last_pattern_code_point, Unicode::GeneralCategory::Separator)) { | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  |             u32 last_currency_code_point = last_code_point(currency_display); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |             if (!Unicode::code_point_has_general_category(last_currency_code_point, Unicode::GeneralCategory::Symbol)) | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |                 currency_key_with_spacing = TRY(String::formatted("{}{}", currency_key, spacing)); | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (currency_key_with_spacing.has_value()) | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |         return TRY(TRY(String::from_utf8(base_pattern)).replace(currency_key, *currency_key_with_spacing, ReplaceMode::FirstOnly)); | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |     return OptionalNone {}; | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  | // https://unicode.org/reports/tr35/tr35-numbers.html#83-range-pattern-processing
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  | ErrorOr<Optional<String>> augment_range_pattern([[maybe_unused]] StringView range_separator, [[maybe_unused]] StringView lower, [[maybe_unused]] StringView upper) | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  | { | 
					
						
							|  |  |  | #if ENABLE_UNICODE_DATA
 | 
					
						
							|  |  |  |     auto range_pattern_with_spacing = [&]() { | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |         return String::formatted(" {} ", range_separator); | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  |     }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Utf8View utf8_range_separator { range_separator }; | 
					
						
							|  |  |  |     Utf8View utf8_upper { upper }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // NOTE: Our implementation does the prescribed checks backwards for simplicity.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // To determine whether to add spacing, the currently recommended heuristic is:
 | 
					
						
							|  |  |  |     // 2. If the range pattern does not contain a character having the White_Space binary Unicode property after the {0} or before the {1} placeholders.
 | 
					
						
							|  |  |  |     for (auto it = utf8_range_separator.begin(); it != utf8_range_separator.end(); ++it) { | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |         if (Unicode::code_point_has_property(*it, Unicode::Property::White_Space)) | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |             return OptionalNone {}; | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // 1. If the lower string ends with a character other than a digit, or if the upper string begins with a character other than a digit.
 | 
					
						
							|  |  |  |     if (auto it = utf8_upper.begin(); it != utf8_upper.end()) { | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |         if (!Unicode::code_point_has_general_category(*it, Unicode::GeneralCategory::Decimal_Number)) | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  |             return range_pattern_with_spacing(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 12:01:10 -04:00
										 |  |  |     if (!Unicode::code_point_has_general_category(last_code_point(lower), Unicode::GeneralCategory::Decimal_Number)) | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  |         return range_pattern_with_spacing(); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-22 11:55:26 -05:00
										 |  |  |     return OptionalNone {}; | 
					
						
							| 
									
										
										
										
											2022-07-20 13:52:36 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-27 10:53:42 -05:00
										 |  |  | } |