2021-11-12 09:09:48 -05:00
/*
2022-01-31 13:07:22 -05:00
* Copyright ( c ) 2021 , Tim Flynn < trflynn89 @ serenityos . org >
2021-11-12 09:09:48 -05:00
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
# include "GeneratorUtil.h"
# include <AK/AllOf.h>
2021-11-16 09:31:15 -05:00
# include <AK/Array.h>
2021-11-12 09:09:48 -05:00
# include <AK/CharacterTypes.h>
2021-11-16 09:31:15 -05:00
# include <AK/Find.h>
2021-11-12 09:09:48 -05:00
# include <AK/Format.h>
2021-12-02 19:59:09 -05:00
# include <AK/HashFunctions.h>
2021-11-12 09:09:48 -05:00
# include <AK/HashMap.h>
# include <AK/JsonObject.h>
# include <AK/JsonParser.h>
# include <AK/JsonValue.h>
# include <AK/LexicalPath.h>
# include <AK/QuickSort.h>
# include <AK/SourceGenerator.h>
# include <AK/String.h>
# include <AK/StringBuilder.h>
2021-12-02 19:59:09 -05:00
# include <AK/Traits.h>
2021-11-15 07:42:39 -05:00
# include <AK/Utf8View.h>
2021-11-12 09:09:48 -05:00
# include <LibCore/ArgsParser.h>
# include <LibCore/DirIterator.h>
# include <LibCore/File.h>
2022-02-06 16:00:13 -05:00
# include <LibCore/Stream.h>
2021-11-12 09:09:48 -05:00
# include <LibUnicode/Locale.h>
2021-11-27 10:53:42 -05:00
# include <LibUnicode/NumberFormat.h>
2021-11-12 09:09:48 -05:00
# include <math.h>
using StringIndexType = u16 ;
constexpr auto s_string_index_type = " u16 " sv ;
2021-12-02 19:59:09 -05:00
using NumberFormatIndexType = u16 ;
constexpr auto s_number_format_index_type = " u16 " sv ;
2021-12-10 16:03:17 -05:00
using NumberFormatListIndexType = u16 ;
constexpr auto s_number_format_list_index_type = " u16 " sv ;
2021-12-11 00:37:34 -05:00
using NumericSymbolListIndexType = u8 ;
constexpr auto s_numeric_symbol_list_index_type = " u8 " sv ;
2021-12-11 08:12:07 -05:00
using NumberSystemIndexType = u8 ;
constexpr auto s_number_system_index_type = " u8 " sv ;
2021-12-11 08:30:50 -05:00
using UnitIndexType = u16 ;
constexpr auto s_unit_index_type = " u16 " sv ;
2021-11-15 07:42:39 -05:00
enum class NumberFormatType {
Standard ,
Compact ,
} ;
2021-11-12 09:09:48 -05:00
struct NumberFormat : public Unicode : : NumberFormat {
using Base = Unicode : : NumberFormat ;
static Base : : Plurality plurality_from_string ( StringView plurality )
{
if ( plurality = = " other " sv )
return Base : : Plurality : : Other ;
if ( plurality = = " 1 " sv )
return Base : : Plurality : : Single ;
if ( plurality = = " zero " sv )
return Base : : Plurality : : Zero ;
if ( plurality = = " one " sv )
return Base : : Plurality : : One ;
if ( plurality = = " two " sv )
return Base : : Plurality : : Two ;
if ( plurality = = " few " sv )
return Base : : Plurality : : Few ;
if ( plurality = = " many " sv )
return Base : : Plurality : : Many ;
VERIFY_NOT_REACHED ( ) ;
}
2021-12-02 19:59:09 -05:00
unsigned hash ( ) const
{
auto hash = pair_int_hash ( magnitude , exponent ) ;
hash = pair_int_hash ( hash , static_cast < u8 > ( plurality ) ) ;
hash = pair_int_hash ( hash , zero_format_index ) ;
hash = pair_int_hash ( hash , positive_format_index ) ;
hash = pair_int_hash ( hash , negative_format_index ) ;
for ( auto index : identifier_indices )
hash = pair_int_hash ( hash , index ) ;
return hash ;
}
bool operator = = ( NumberFormat const & other ) const
{
return ( magnitude = = other . magnitude )
& & ( exponent = = other . exponent )
& & ( plurality = = other . plurality )
& & ( zero_format_index = = other . zero_format_index )
& & ( positive_format_index = = other . positive_format_index )
& & ( negative_format_index = = other . negative_format_index )
& & ( identifier_indices = = other . identifier_indices ) ;
}
2021-11-12 09:09:48 -05:00
StringIndexType zero_format_index { 0 } ;
StringIndexType positive_format_index { 0 } ;
StringIndexType negative_format_index { 0 } ;
2021-11-16 13:53:45 -05:00
Vector < StringIndexType > identifier_indices { } ;
2021-11-12 09:09:48 -05:00
} ;
2021-12-02 19:59:09 -05:00
template < >
struct AK : : Formatter < NumberFormat > : Formatter < FormatString > {
ErrorOr < void > format ( FormatBuilder & builder , NumberFormat const & format )
{
StringBuilder identifier_indices ;
identifier_indices . join ( " , " sv , format . identifier_indices ) ;
return Formatter < FormatString > : : format ( builder ,
" {{ {}, {}, {}, {}, {}, {}, {{ {} }} }} " ,
format . magnitude ,
format . exponent ,
static_cast < u8 > ( format . plurality ) ,
format . zero_format_index ,
format . positive_format_index ,
format . negative_format_index ,
identifier_indices . build ( ) ) ;
}
} ;
template < >
struct AK : : Traits < NumberFormat > : public GenericTraits < NumberFormat > {
static unsigned hash ( NumberFormat const & f ) { return f . hash ( ) ; }
} ;
2021-12-10 16:03:17 -05:00
using NumberFormatList = Vector < NumberFormatIndexType > ;
2021-12-11 00:37:34 -05:00
using NumericSymbolList = Vector < StringIndexType > ;
2021-11-12 09:09:48 -05:00
struct NumberSystem {
2021-12-11 08:12:07 -05:00
unsigned hash ( ) const
{
2022-01-11 18:50:15 -05:00
auto hash = int_hash ( symbols ) ;
2021-12-11 08:12:07 -05:00
hash = pair_int_hash ( hash , primary_grouping_size ) ;
hash = pair_int_hash ( hash , secondary_grouping_size ) ;
hash = pair_int_hash ( hash , decimal_format ) ;
hash = pair_int_hash ( hash , decimal_long_formats ) ;
hash = pair_int_hash ( hash , decimal_short_formats ) ;
hash = pair_int_hash ( hash , currency_format ) ;
hash = pair_int_hash ( hash , accounting_format ) ;
hash = pair_int_hash ( hash , currency_unit_formats ) ;
hash = pair_int_hash ( hash , currency_short_formats ) ;
hash = pair_int_hash ( hash , percent_format ) ;
hash = pair_int_hash ( hash , scientific_format ) ;
return hash ;
}
bool operator = = ( NumberSystem const & other ) const
{
2022-01-11 18:50:15 -05:00
return ( symbols = = other . symbols )
2021-12-11 08:12:07 -05:00
& & ( primary_grouping_size = = other . primary_grouping_size )
& & ( secondary_grouping_size = = other . secondary_grouping_size )
& & ( decimal_format = = other . decimal_format )
& & ( decimal_long_formats = = other . decimal_long_formats )
& & ( decimal_short_formats = = other . decimal_short_formats )
& & ( currency_format = = other . currency_format )
& & ( accounting_format = = other . accounting_format )
& & ( currency_unit_formats = = other . currency_unit_formats )
& & ( currency_short_formats = = other . currency_short_formats )
& & ( percent_format = = other . percent_format )
& & ( scientific_format = = other . scientific_format ) ;
}
2021-12-11 00:37:34 -05:00
NumericSymbolListIndexType symbols { 0 } ;
2021-11-12 09:09:48 -05:00
2021-11-13 22:03:22 -05:00
u8 primary_grouping_size { 0 } ;
u8 secondary_grouping_size { 0 } ;
2021-12-02 19:59:09 -05:00
NumberFormatIndexType decimal_format { 0 } ;
2021-12-10 16:03:17 -05:00
NumberFormatListIndexType decimal_long_formats { 0 } ;
NumberFormatListIndexType decimal_short_formats { 0 } ;
2021-11-12 09:09:48 -05:00
2021-12-02 19:59:09 -05:00
NumberFormatIndexType currency_format { 0 } ;
NumberFormatIndexType accounting_format { 0 } ;
2021-12-10 16:03:17 -05:00
NumberFormatListIndexType currency_unit_formats { 0 } ;
NumberFormatListIndexType currency_short_formats { 0 } ;
2021-11-12 09:09:48 -05:00
2021-12-02 19:59:09 -05:00
NumberFormatIndexType percent_format { 0 } ;
NumberFormatIndexType scientific_format { 0 } ;
2021-11-12 09:09:48 -05:00
} ;
2021-12-11 08:12:07 -05:00
template < >
struct AK : : Formatter < NumberSystem > : Formatter < FormatString > {
ErrorOr < void > format ( FormatBuilder & builder , NumberSystem const & system )
{
return Formatter < FormatString > : : format ( builder ,
2022-01-11 18:50:15 -05:00
" {{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }} " ,
2021-12-11 08:12:07 -05:00
system . symbols ,
system . primary_grouping_size ,
system . secondary_grouping_size ,
system . decimal_format ,
system . decimal_long_formats ,
system . decimal_short_formats ,
system . currency_format ,
system . accounting_format ,
system . currency_unit_formats ,
system . currency_short_formats ,
system . percent_format ,
system . scientific_format ) ;
}
} ;
template < >
struct AK : : Traits < NumberSystem > : public GenericTraits < NumberSystem > {
static unsigned hash ( NumberSystem const & s ) { return s . hash ( ) ; }
} ;
2021-11-16 09:31:15 -05:00
struct Unit {
2021-12-11 08:30:50 -05:00
unsigned hash ( ) const
{
auto hash = int_hash ( unit ) ;
hash = pair_int_hash ( hash , long_formats ) ;
hash = pair_int_hash ( hash , short_formats ) ;
hash = pair_int_hash ( hash , narrow_formats ) ;
return hash ;
}
bool operator = = ( Unit const & other ) const
{
return ( unit = = other . unit )
& & ( long_formats = = other . long_formats )
& & ( short_formats = = other . short_formats )
& & ( narrow_formats = = other . narrow_formats ) ;
}
2021-11-16 09:31:15 -05:00
StringIndexType unit { 0 } ;
2021-12-10 16:36:39 -05:00
NumberFormatListIndexType long_formats { 0 } ;
NumberFormatListIndexType short_formats { 0 } ;
NumberFormatListIndexType narrow_formats { 0 } ;
2021-11-16 09:31:15 -05:00
} ;
2021-12-11 08:30:50 -05:00
template < >
struct AK : : Formatter < Unit > : Formatter < FormatString > {
ErrorOr < void > format ( FormatBuilder & builder , Unit const & system )
{
return Formatter < FormatString > : : format ( builder ,
" {{ {}, {}, {}, {} }} " ,
system . unit ,
system . long_formats ,
system . short_formats ,
system . narrow_formats ) ;
}
} ;
template < >
struct AK : : Traits < Unit > : public GenericTraits < Unit > {
static unsigned hash ( Unit const & u ) { return u . hash ( ) ; }
} ;
2021-11-12 09:09:48 -05:00
struct Locale {
2022-01-11 18:50:15 -05:00
Vector < NumberSystemIndexType > number_systems ;
2021-12-11 08:30:50 -05:00
HashMap < String , UnitIndexType > units { } ;
2022-01-27 12:40:31 -05:00
u8 minimum_grouping_digits { 0 } ;
2021-11-12 09:09:48 -05:00
} ;
struct UnicodeLocaleData {
UniqueStringStorage < StringIndexType > unique_strings ;
2021-12-02 19:59:09 -05:00
UniqueStorage < NumberFormat , NumberFormatIndexType > unique_formats ;
2021-12-10 16:03:17 -05:00
UniqueStorage < NumberFormatList , NumberFormatListIndexType > unique_format_lists ;
2021-12-11 00:37:34 -05:00
UniqueStorage < NumericSymbolList , NumericSymbolListIndexType > unique_symbols ;
2021-12-11 08:12:07 -05:00
UniqueStorage < NumberSystem , NumberSystemIndexType > unique_systems ;
2021-12-11 08:30:50 -05:00
UniqueStorage < Unit , UnitIndexType > unique_units ;
2021-12-10 16:03:17 -05:00
2022-01-11 18:42:07 -05:00
HashMap < String , Array < u32 , 10 > > number_system_digits ;
Vector < String > number_systems ;
2021-11-12 09:09:48 -05:00
HashMap < String , Locale > locales ;
2021-11-16 13:53:45 -05:00
size_t max_identifier_count { 0 } ;
2021-11-12 09:09:48 -05:00
} ;
2022-01-11 18:42:07 -05:00
static ErrorOr < void > parse_number_system_digits ( String core_supplemental_path , UnicodeLocaleData & locale_data )
{
LexicalPath number_systems_path ( move ( core_supplemental_path ) ) ;
number_systems_path = number_systems_path . append ( " numberingSystems.json " sv ) ;
2022-02-06 16:00:13 -05:00
auto number_systems = TRY ( read_json_file ( number_systems_path . string ( ) ) ) ;
2022-01-11 18:42:07 -05:00
auto const & supplemental_object = number_systems . as_object ( ) . get ( " supplemental " sv ) ;
auto const & number_systems_object = supplemental_object . as_object ( ) . get ( " numberingSystems " sv ) ;
number_systems_object . as_object ( ) . for_each_member ( [ & ] ( auto const & number_system , auto const & digits_object ) {
auto type = digits_object . as_object ( ) . get ( " _type " sv ) . as_string ( ) ;
if ( type ! = " numeric " sv )
return ;
auto digits = digits_object . as_object ( ) . get ( " _digits " sv ) . as_string ( ) ;
Utf8View utf8_digits { digits } ;
VERIFY ( utf8_digits . length ( ) = = 10 ) ;
auto & number_system_digits = locale_data . number_system_digits . ensure ( number_system ) ;
size_t index = 0 ;
for ( u32 digit : utf8_digits )
number_system_digits [ index + + ] = digit ;
if ( ! locale_data . number_systems . contains_slow ( number_system ) )
locale_data . number_systems . append ( number_system ) ;
} ) ;
return { } ;
}
2021-11-16 13:53:45 -05:00
static String parse_identifiers ( String pattern , StringView replacement , UnicodeLocaleData & locale_data , NumberFormat & format )
2021-11-16 10:09:17 -05:00
{
2022-01-16 20:34:09 +02:00
static constexpr Utf8View whitespace { " \u0020 \u00a0 \u200f " sv } ;
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
while ( true ) {
Utf8View utf8_pattern { pattern } ;
Optional < size_t > start_index ;
Optional < size_t > end_index ;
bool inside_replacement = false ;
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
for ( auto it = utf8_pattern . begin ( ) ; it ! = utf8_pattern . end ( ) ; + + it ) {
if ( * it = = ' { ' ) {
if ( start_index . has_value ( ) ) {
end_index = utf8_pattern . byte_offset_of ( it ) ;
break ;
}
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
inside_replacement = true ;
} else if ( * it = = ' } ' ) {
inside_replacement = false ;
} else if ( ! inside_replacement & & ! start_index . has_value ( ) & & ! whitespace . contains ( * it ) ) {
start_index = utf8_pattern . byte_offset_of ( it ) ;
}
2021-11-16 10:09:17 -05:00
}
2021-11-16 13:53:45 -05:00
if ( ! start_index . has_value ( ) )
return pattern ;
end_index = end_index . value_or ( pattern . length ( ) ) ;
utf8_pattern = utf8_pattern . substring_view ( * start_index , * end_index - * start_index ) ;
utf8_pattern = utf8_pattern . trim ( whitespace ) ;
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
auto identifier = utf8_pattern . as_string ( ) . replace ( " '.' " sv , " . " sv ) ;
auto identifier_index = locale_data . unique_strings . ensure ( move ( identifier ) ) ;
size_t replacement_index = 0 ;
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
if ( auto index = format . identifier_indices . find_first_index ( identifier_index ) ; index . has_value ( ) ) {
replacement_index = * index ;
} else {
replacement_index = format . identifier_indices . size ( ) ;
format . identifier_indices . append ( identifier_index ) ;
2021-11-16 10:09:17 -05:00
2021-11-16 13:53:45 -05:00
locale_data . max_identifier_count = max ( locale_data . max_identifier_count , format . identifier_indices . size ( ) ) ;
}
pattern = String : : formatted ( " {}{{{}:{}}}{} " ,
* start_index > 0 ? pattern . substring_view ( 0 , * start_index ) : " " sv ,
replacement ,
replacement_index ,
pattern . substring_view ( * start_index + utf8_pattern . byte_length ( ) ) ) ;
}
2021-11-16 10:09:17 -05:00
}
2021-11-15 07:56:20 -05:00
static void parse_number_pattern ( Vector < String > patterns , UnicodeLocaleData & locale_data , NumberFormatType type , NumberFormat & format , NumberSystem * number_system_for_groupings = nullptr )
2021-11-12 09:09:48 -05:00
{
// https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns
// https://cldr.unicode.org/translation/number-currency-formats/number-and-currency-patterns
2021-11-15 07:56:20 -05:00
VERIFY ( ( patterns . size ( ) = = 1 ) | | ( patterns . size ( ) = = 2 ) ) ;
2021-11-12 17:16:30 -05:00
auto replace_patterns = [ & ] ( String pattern ) {
2021-11-12 09:09:48 -05:00
static HashMap < StringView , StringView > replacements = {
2021-11-13 10:15:33 -05:00
{ " {0} " sv , " {number} " sv } ,
{ " {1} " sv , " {currency} " sv } ,
2021-11-12 09:09:48 -05:00
{ " % " sv , " {percentSign} " sv } ,
{ " + " sv , " {plusSign} " sv } ,
{ " - " sv , " {minusSign} " sv } ,
2021-11-12 23:16:37 -05:00
{ " ¤ " sv , " {currency} " sv } , // U+00A4 Currency Sign
2021-11-14 08:35:10 -05:00
{ " E " sv , " {scientificSeparator} " sv } ,
2021-11-12 09:09:48 -05:00
} ;
2021-11-13 10:15:33 -05:00
for ( auto const & replacement : replacements )
pattern = pattern . replace ( replacement . key , replacement . value , true ) ;
2021-11-12 15:49:14 -05:00
if ( auto start_number_index = pattern . find_any_of ( " #0 " sv , String : : SearchDirection : : Forward ) ; start_number_index . has_value ( ) ) {
2021-11-12 09:09:48 -05:00
auto end_number_index = * start_number_index + 1 ;
for ( ; end_number_index < pattern . length ( ) ; + + end_number_index ) {
auto ch = pattern [ end_number_index ] ;
if ( ( ch ! = ' # ' ) & & ( ch ! = ' 0 ' ) & & ( ch ! = ' , ' ) & & ( ch ! = ' . ' ) )
break ;
}
2021-11-13 22:03:22 -05:00
if ( number_system_for_groupings ) {
auto number_pattern = pattern . substring_view ( * start_number_index , end_number_index - * start_number_index ) ;
auto group_separators = number_pattern . find_all ( " , " sv ) ;
VERIFY ( ( group_separators . size ( ) = = 1 ) | | ( group_separators . size ( ) = = 2 ) ) ;
auto decimal = number_pattern . find ( ' . ' ) ;
VERIFY ( decimal . has_value ( ) ) ;
if ( group_separators . size ( ) = = 1 ) {
number_system_for_groupings - > primary_grouping_size = * decimal - group_separators [ 0 ] - 1 ;
number_system_for_groupings - > secondary_grouping_size = number_system_for_groupings - > primary_grouping_size ;
} else {
number_system_for_groupings - > primary_grouping_size = * decimal - group_separators [ 1 ] - 1 ;
number_system_for_groupings - > secondary_grouping_size = group_separators [ 1 ] - group_separators [ 0 ] - 1 ;
}
}
2021-11-12 09:09:48 -05:00
pattern = String : : formatted ( " {}{{number}}{} " ,
* start_number_index > 0 ? pattern . substring_view ( 0 , * start_number_index ) : " " sv ,
pattern . substring_view ( end_number_index ) ) ;
2021-11-14 08:35:10 -05:00
// This is specifically handled here rather than in the replacements HashMap above so
// that we do not errantly replace zeroes in number patterns.
if ( pattern . contains ( * replacements . get ( " E " sv ) ) )
pattern = pattern . replace ( " 0 " sv , " {scientificExponent} " sv ) ;
2021-11-12 09:09:48 -05:00
}
2021-11-16 10:09:17 -05:00
if ( type = = NumberFormatType : : Compact )
2021-11-16 13:53:45 -05:00
return parse_identifiers ( move ( pattern ) , " compactIdentifier " sv , locale_data , format ) ;
2021-11-15 07:42:39 -05:00
2021-11-12 09:09:48 -05:00
return pattern ;
} ;
auto zero_format = replace_patterns ( move ( patterns [ 0 ] ) ) ;
format . positive_format_index = locale_data . unique_strings . ensure ( String : : formatted ( " {{plusSign}}{} " , zero_format ) ) ;
if ( patterns . size ( ) = = 2 ) {
auto negative_format = replace_patterns ( move ( patterns [ 1 ] ) ) ;
format . negative_format_index = locale_data . unique_strings . ensure ( move ( negative_format ) ) ;
} else {
format . negative_format_index = locale_data . unique_strings . ensure ( String : : formatted ( " {{minusSign}}{} " , zero_format ) ) ;
}
format . zero_format_index = locale_data . unique_strings . ensure ( move ( zero_format ) ) ;
}
2021-12-02 19:59:09 -05:00
static void parse_number_pattern ( Vector < String > patterns , UnicodeLocaleData & locale_data , NumberFormatType type , NumberFormatIndexType & format_index , NumberSystem * number_system_for_groupings = nullptr )
{
NumberFormat format { } ;
parse_number_pattern ( move ( patterns ) , locale_data , type , format , number_system_for_groupings ) ;
format_index = locale_data . unique_formats . ensure ( move ( format ) ) ;
}
2021-11-23 11:00:27 -05:00
static ErrorOr < void > parse_number_systems ( String locale_numbers_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-11-12 09:09:48 -05:00
{
LexicalPath numbers_path ( move ( locale_numbers_path ) ) ;
numbers_path = numbers_path . append ( " numbers.json " sv ) ;
2022-02-06 16:00:13 -05:00
auto numbers = TRY ( read_json_file ( numbers_path . string ( ) ) ) ;
2021-11-15 01:46:51 +01:00
auto const & main_object = numbers . as_object ( ) . get ( " main " sv ) ;
2021-11-12 09:09:48 -05:00
auto const & locale_object = main_object . as_object ( ) . get ( numbers_path . parent ( ) . basename ( ) ) ;
auto const & locale_numbers_object = locale_object . as_object ( ) . get ( " numbers " sv ) ;
2022-01-27 12:40:31 -05:00
auto const & minimum_grouping_digits = locale_numbers_object . as_object ( ) . get ( " minimumGroupingDigits " sv ) ;
2021-11-12 09:09:48 -05:00
2022-01-11 18:50:15 -05:00
Vector < Optional < NumberSystem > > number_systems ;
number_systems . resize ( locale_data . number_systems . size ( ) ) ;
2021-12-11 08:12:07 -05:00
2021-11-12 09:09:48 -05:00
auto ensure_number_system = [ & ] ( auto const & system ) - > NumberSystem & {
2022-01-11 18:50:15 -05:00
auto system_index = locale_data . number_systems . find_first_index ( system ) . value ( ) ;
VERIFY ( system_index < number_systems . size ( ) ) ;
auto & number_system = number_systems . at ( system_index ) ;
if ( ! number_system . has_value ( ) )
number_system = NumberSystem { } ;
return number_system . value ( ) ;
2021-11-12 09:09:48 -05:00
} ;
auto parse_number_format = [ & ] ( auto const & format_object ) {
2021-12-02 19:59:09 -05:00
Vector < NumberFormatIndexType > result ;
2021-11-12 09:09:48 -05:00
result . ensure_capacity ( format_object . size ( ) ) ;
format_object . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto split_key = key . split_view ( ' - ' ) ;
2021-11-12 17:16:30 -05:00
if ( split_key . size ( ) ! = 3 )
return ;
2021-11-12 09:09:48 -05:00
2021-11-15 07:56:20 -05:00
auto patterns = value . as_string ( ) . split ( ' ; ' ) ;
2021-11-12 09:09:48 -05:00
NumberFormat format { } ;
2021-11-12 17:16:30 -05:00
if ( auto type = split_key [ 0 ] . template to_uint < u64 > ( ) ; type . has_value ( ) ) {
VERIFY ( * type % 10 = = 0 ) ;
format . magnitude = static_cast < u8 > ( log10 ( * type ) ) ;
2021-11-15 07:56:20 -05:00
if ( patterns [ 0 ] ! = " 0 " sv ) {
auto number_of_zeroes_in_pattern = patterns [ 0 ] . count ( " 0 " sv ) ;
VERIFY ( format . magnitude > = number_of_zeroes_in_pattern ) ;
format . exponent = format . magnitude + 1 - number_of_zeroes_in_pattern ;
}
2021-11-12 17:16:30 -05:00
} else {
VERIFY ( split_key [ 0 ] = = " unitPattern " sv ) ;
}
2021-11-12 09:09:48 -05:00
format . plurality = NumberFormat : : plurality_from_string ( split_key [ 2 ] ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( move ( patterns ) , locale_data , NumberFormatType : : Compact , format ) ;
2021-11-12 09:09:48 -05:00
2021-12-02 19:59:09 -05:00
auto format_index = locale_data . unique_formats . ensure ( move ( format ) ) ;
result . append ( format_index ) ;
2021-11-12 09:09:48 -05:00
} ) ;
2021-12-10 16:03:17 -05:00
return locale_data . unique_format_lists . ensure ( move ( result ) ) ;
2021-11-12 09:09:48 -05:00
} ;
2021-12-11 00:37:34 -05:00
auto numeric_symbol_from_string = [ & ] ( StringView numeric_symbol ) - > Optional < Unicode : : NumericSymbol > {
if ( numeric_symbol = = " decimal " sv )
return Unicode : : NumericSymbol : : Decimal ;
if ( numeric_symbol = = " exponential " sv )
return Unicode : : NumericSymbol : : Exponential ;
if ( numeric_symbol = = " group " sv )
return Unicode : : NumericSymbol : : Group ;
if ( numeric_symbol = = " infinity " sv )
return Unicode : : NumericSymbol : : Infinity ;
if ( numeric_symbol = = " minusSign " sv )
return Unicode : : NumericSymbol : : MinusSign ;
if ( numeric_symbol = = " nan " sv )
return Unicode : : NumericSymbol : : NaN ;
if ( numeric_symbol = = " percentSign " sv )
return Unicode : : NumericSymbol : : PercentSign ;
if ( numeric_symbol = = " plusSign " sv )
return Unicode : : NumericSymbol : : PlusSign ;
return { } ;
} ;
2021-11-12 09:09:48 -05:00
locale_numbers_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
constexpr auto symbols_prefix = " symbols-numberSystem- " sv ;
constexpr auto decimal_formats_prefix = " decimalFormats-numberSystem- " sv ;
constexpr auto currency_formats_prefix = " currencyFormats-numberSystem- " sv ;
constexpr auto percent_formats_prefix = " percentFormats-numberSystem- " sv ;
2021-11-14 08:35:10 -05:00
constexpr auto scientific_formats_prefix = " scientificFormats-numberSystem- " sv ;
2021-11-12 09:09:48 -05:00
if ( key . starts_with ( symbols_prefix ) ) {
auto system = key . substring ( symbols_prefix . length ( ) ) ;
auto & number_system = ensure_number_system ( system ) ;
2021-12-11 00:37:34 -05:00
NumericSymbolList symbols ;
2021-11-12 09:09:48 -05:00
value . as_object ( ) . for_each_member ( [ & ] ( auto const & symbol , JsonValue const & localization ) {
2021-12-11 00:37:34 -05:00
auto numeric_symbol = numeric_symbol_from_string ( symbol ) ;
if ( ! numeric_symbol . has_value ( ) )
return ;
if ( to_underlying ( * numeric_symbol ) > = symbols . size ( ) )
symbols . resize ( to_underlying ( * numeric_symbol ) + 1 ) ;
2021-11-12 09:09:48 -05:00
2021-12-11 00:37:34 -05:00
auto symbol_index = locale_data . unique_strings . ensure ( localization . as_string ( ) ) ;
symbols [ to_underlying ( * numeric_symbol ) ] = symbol_index ;
2021-11-12 09:09:48 -05:00
} ) ;
2021-12-11 00:37:34 -05:00
number_system . symbols = locale_data . unique_symbols . ensure ( move ( symbols ) ) ;
2021-11-12 09:09:48 -05:00
} else if ( key . starts_with ( decimal_formats_prefix ) ) {
auto system = key . substring ( decimal_formats_prefix . length ( ) ) ;
auto & number_system = ensure_number_system ( system ) ;
auto format_object = value . as_object ( ) . get ( " standard " sv ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( format_object . as_string ( ) . split ( ' ; ' ) , locale_data , NumberFormatType : : Standard , number_system . decimal_format , & number_system ) ;
2021-11-12 09:09:48 -05:00
auto const & long_format = value . as_object ( ) . get ( " long " sv ) . as_object ( ) . get ( " decimalFormat " sv ) ;
number_system . decimal_long_formats = parse_number_format ( long_format . as_object ( ) ) ;
auto const & short_format = value . as_object ( ) . get ( " short " sv ) . as_object ( ) . get ( " decimalFormat " sv ) ;
number_system . decimal_short_formats = parse_number_format ( short_format . as_object ( ) ) ;
} else if ( key . starts_with ( currency_formats_prefix ) ) {
auto system = key . substring ( currency_formats_prefix . length ( ) ) ;
auto & number_system = ensure_number_system ( system ) ;
auto format_object = value . as_object ( ) . get ( " standard " sv ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( format_object . as_string ( ) . split ( ' ; ' ) , locale_data , NumberFormatType : : Standard , number_system . currency_format ) ;
2021-11-12 09:09:48 -05:00
format_object = value . as_object ( ) . get ( " accounting " sv ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( format_object . as_string ( ) . split ( ' ; ' ) , locale_data , NumberFormatType : : Standard , number_system . accounting_format ) ;
2021-11-12 09:09:48 -05:00
2021-11-12 17:16:30 -05:00
number_system . currency_unit_formats = parse_number_format ( value . as_object ( ) ) ;
2021-11-12 09:09:48 -05:00
if ( value . as_object ( ) . has ( " short " sv ) ) {
auto const & short_format = value . as_object ( ) . get ( " short " sv ) . as_object ( ) . get ( " standard " sv ) ;
number_system . currency_short_formats = parse_number_format ( short_format . as_object ( ) ) ;
}
} else if ( key . starts_with ( percent_formats_prefix ) ) {
2021-11-14 08:09:25 -05:00
auto system = key . substring ( percent_formats_prefix . length ( ) ) ;
2021-11-12 09:09:48 -05:00
auto & number_system = ensure_number_system ( system ) ;
auto format_object = value . as_object ( ) . get ( " standard " sv ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( format_object . as_string ( ) . split ( ' ; ' ) , locale_data , NumberFormatType : : Standard , number_system . percent_format ) ;
2021-11-14 08:35:10 -05:00
} else if ( key . starts_with ( scientific_formats_prefix ) ) {
auto system = key . substring ( scientific_formats_prefix . length ( ) ) ;
auto & number_system = ensure_number_system ( system ) ;
auto format_object = value . as_object ( ) . get ( " standard " sv ) ;
2021-11-15 07:56:20 -05:00
parse_number_pattern ( format_object . as_string ( ) . split ( ' ; ' ) , locale_data , NumberFormatType : : Standard , number_system . scientific_format ) ;
2021-11-12 09:09:48 -05:00
}
} ) ;
2021-11-23 11:00:27 -05:00
2022-01-11 18:50:15 -05:00
locale . number_systems . ensure_capacity ( number_systems . size ( ) ) ;
2021-12-11 08:12:07 -05:00
for ( auto & number_system : number_systems ) {
2022-01-11 18:50:15 -05:00
NumberSystemIndexType system_index = 0 ;
if ( number_system . has_value ( ) )
system_index = locale_data . unique_systems . ensure ( number_system . release_value ( ) ) ;
locale . number_systems . append ( system_index ) ;
2021-12-11 08:12:07 -05:00
}
2022-01-27 12:40:31 -05:00
locale . minimum_grouping_digits = minimum_grouping_digits . as_string ( ) . template to_uint < u8 > ( ) . value ( ) ;
2021-11-23 11:00:27 -05:00
return { } ;
2021-11-12 09:09:48 -05:00
}
2021-11-23 11:00:27 -05:00
static ErrorOr < void > parse_units ( String locale_units_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-11-16 09:31:15 -05:00
{
LexicalPath units_path ( move ( locale_units_path ) ) ;
units_path = units_path . append ( " units.json " sv ) ;
2022-02-06 16:00:13 -05:00
auto locale_units = TRY ( read_json_file ( units_path . string ( ) ) ) ;
2021-12-11 08:30:50 -05:00
auto const & main_object = locale_units . as_object ( ) . get ( " main " sv ) ;
2021-11-16 09:31:15 -05:00
auto const & locale_object = main_object . as_object ( ) . get ( units_path . parent ( ) . basename ( ) ) ;
auto const & locale_units_object = locale_object . as_object ( ) . get ( " units " sv ) ;
auto const & long_object = locale_units_object . as_object ( ) . get ( " long " sv ) ;
auto const & short_object = locale_units_object . as_object ( ) . get ( " short " sv ) ;
auto const & narrow_object = locale_units_object . as_object ( ) . get ( " narrow " sv ) ;
2021-12-11 08:30:50 -05:00
HashMap < String , Unit > units ;
2021-11-16 09:31:15 -05:00
auto ensure_unit = [ & ] ( auto const & unit ) - > Unit & {
2021-12-11 08:30:50 -05:00
return units . ensure ( unit , [ & ] ( ) {
2021-11-16 09:31:15 -05:00
auto unit_index = locale_data . unique_strings . ensure ( unit ) ;
return Unit { . unit = unit_index } ;
} ) ;
} ;
auto is_sanctioned_unit = [ ] ( StringView unit_name ) {
// This is a copy of the units sanctioned for use within ECMA-402. LibUnicode generally tries to
// avoid being directly dependent on ECMA-402, but this rather significantly reduces the amount
// of data generated here, and ECMA-402 is currently the only consumer of this data.
// https://tc39.es/ecma402/#table-sanctioned-simple-unit-identifiers
constexpr auto sanctioned_units = AK : : Array { " acre " sv , " bit " sv , " byte " sv , " celsius " sv , " centimeter " sv , " day " sv , " degree " sv , " fahrenheit " sv , " fluid-ounce " sv , " foot " sv , " gallon " sv , " gigabit " sv , " gigabyte " sv , " gram " sv , " hectare " sv , " hour " sv , " inch " sv , " kilobit " sv , " kilobyte " sv , " kilogram " sv , " kilometer " sv , " liter " sv , " megabit " sv , " megabyte " sv , " meter " sv , " mile " sv , " mile-scandinavian " sv , " milliliter " sv , " millimeter " sv , " millisecond " sv , " minute " sv , " month " sv , " ounce " sv , " percent " sv , " petabyte " sv , " pound " sv , " second " sv , " stone " sv , " terabit " sv , " terabyte " sv , " week " sv , " yard " sv , " year " sv } ;
return find ( sanctioned_units . begin ( ) , sanctioned_units . end ( ) , unit_name ) ! = sanctioned_units . end ( ) ;
} ;
auto parse_units_object = [ & ] ( auto const & units_object , Unicode : : Style style ) {
constexpr auto unit_pattern_prefix = " unitPattern-count- " sv ;
constexpr auto combined_unit_separator = " -per- " sv ;
units_object . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto end_of_category = key . find ( ' - ' ) ;
if ( ! end_of_category . has_value ( ) )
return ;
auto unit_name = key . substring ( * end_of_category + 1 ) ;
if ( ! is_sanctioned_unit ( unit_name ) ) {
auto indices = unit_name . find_all ( combined_unit_separator ) ;
if ( indices . size ( ) ! = 1 )
return ;
auto numerator = unit_name . substring_view ( 0 , indices [ 0 ] ) ;
auto denominator = unit_name . substring_view ( indices [ 0 ] + combined_unit_separator . length ( ) ) ;
if ( ! is_sanctioned_unit ( numerator ) | | ! is_sanctioned_unit ( denominator ) )
return ;
}
2021-12-10 16:36:39 -05:00
auto & unit = ensure_unit ( unit_name ) ;
NumberFormatList formats ;
2021-11-16 09:31:15 -05:00
value . as_object ( ) . for_each_member ( [ & ] ( auto const & unit_key , JsonValue const & pattern_value ) {
if ( ! unit_key . starts_with ( unit_pattern_prefix ) )
return ;
NumberFormat format { } ;
auto plurality = unit_key . substring_view ( unit_pattern_prefix . length ( ) ) ;
format . plurality = NumberFormat : : plurality_from_string ( plurality ) ;
auto zero_format = pattern_value . as_string ( ) . replace ( " {0} " sv , " {number} " sv ) ;
zero_format = parse_identifiers ( zero_format , " unitIdentifier " sv , locale_data , format ) ;
format . positive_format_index = locale_data . unique_strings . ensure ( zero_format . replace ( " {number} " sv , " {plusSign}{number} " sv ) ) ;
format . negative_format_index = locale_data . unique_strings . ensure ( zero_format . replace ( " {number} " sv , " {minusSign}{number} " sv ) ) ;
format . zero_format_index = locale_data . unique_strings . ensure ( move ( zero_format ) ) ;
2021-12-10 16:36:39 -05:00
formats . append ( locale_data . unique_formats . ensure ( move ( format ) ) ) ;
2021-11-16 09:31:15 -05:00
} ) ;
2021-12-10 16:36:39 -05:00
auto number_format_list_index = locale_data . unique_format_lists . ensure ( move ( formats ) ) ;
switch ( style ) {
case Unicode : : Style : : Long :
unit . long_formats = number_format_list_index ;
break ;
case Unicode : : Style : : Short :
unit . short_formats = number_format_list_index ;
break ;
case Unicode : : Style : : Narrow :
unit . narrow_formats = number_format_list_index ;
break ;
default :
VERIFY_NOT_REACHED ( ) ;
}
2021-11-16 09:31:15 -05:00
} ) ;
} ;
parse_units_object ( long_object . as_object ( ) , Unicode : : Style : : Long ) ;
parse_units_object ( short_object . as_object ( ) , Unicode : : Style : : Short ) ;
parse_units_object ( narrow_object . as_object ( ) , Unicode : : Style : : Narrow ) ;
2021-11-23 11:00:27 -05:00
2021-12-11 08:30:50 -05:00
for ( auto & unit : units ) {
auto unit_index = locale_data . unique_units . ensure ( move ( unit . value ) ) ;
locale . units . set ( unit . key , unit_index ) ;
}
2021-11-23 11:00:27 -05:00
return { } ;
2021-11-16 09:31:15 -05:00
}
2022-01-11 18:42:07 -05:00
static ErrorOr < void > parse_all_locales ( String core_path , String numbers_path , String units_path , UnicodeLocaleData & locale_data )
2021-11-12 09:09:48 -05:00
{
2021-11-23 11:54:53 -05:00
auto numbers_iterator = TRY ( path_to_dir_iterator ( move ( numbers_path ) ) ) ;
auto units_iterator = TRY ( path_to_dir_iterator ( move ( units_path ) ) ) ;
2021-11-12 09:09:48 -05:00
2022-01-11 18:42:07 -05:00
LexicalPath core_supplemental_path ( move ( core_path ) ) ;
core_supplemental_path = core_supplemental_path . append ( " supplemental " sv ) ;
VERIFY ( Core : : File : : is_directory ( core_supplemental_path . string ( ) ) ) ;
TRY ( parse_number_system_digits ( core_supplemental_path . string ( ) , locale_data ) ) ;
2021-11-23 11:54:53 -05:00
auto remove_variants_from_path = [ & ] ( String path ) - > ErrorOr < String > {
auto parsed_locale = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , LexicalPath : : basename ( path ) ) ) ;
2021-11-12 09:09:48 -05:00
StringBuilder builder ;
2021-11-23 11:54:53 -05:00
builder . append ( locale_data . unique_strings . get ( parsed_locale . language ) ) ;
if ( auto script = locale_data . unique_strings . get ( parsed_locale . script ) ; ! script . is_empty ( ) )
2021-11-12 09:09:48 -05:00
builder . appendff ( " -{} " , script ) ;
2021-11-23 11:54:53 -05:00
if ( auto region = locale_data . unique_strings . get ( parsed_locale . region ) ; ! region . is_empty ( ) )
2021-11-12 09:09:48 -05:00
builder . appendff ( " -{} " , region ) ;
return builder . build ( ) ;
} ;
while ( numbers_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto numbers_path = TRY ( next_path_from_dir_iterator ( numbers_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( numbers_path ) ) ;
2021-11-12 09:09:48 -05:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 11:00:27 -05:00
TRY ( parse_number_systems ( numbers_path , locale_data , locale ) ) ;
2021-11-12 09:09:48 -05:00
}
2021-11-12 19:10:07 -05:00
2021-11-16 09:31:15 -05:00
while ( units_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto units_path = TRY ( next_path_from_dir_iterator ( units_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( units_path ) ) ;
2021-11-16 09:31:15 -05:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 11:00:27 -05:00
TRY ( parse_units ( units_path , locale_data , locale ) ) ;
2021-11-16 09:31:15 -05:00
}
2021-11-23 11:00:27 -05:00
return { } ;
2021-11-12 09:09:48 -05:00
}
2022-01-11 18:42:07 -05:00
static String format_identifier ( StringView , String identifier )
{
return identifier . to_titlecase ( ) ;
}
2022-02-06 16:00:13 -05:00
static ErrorOr < void > generate_unicode_locale_header ( Core : : Stream : : BufferedFile & file , UnicodeLocaleData & locale_data )
2021-11-12 09:09:48 -05:00
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . append ( R " ~~~(
2022-01-11 18:42:07 -05:00
# include <AK/Types.h>
2021-11-12 09:09:48 -05:00
# pragma once
2022-01-11 18:42:07 -05:00
namespace Unicode {
) ~ ~ ~ " );
generate_enum ( generator , format_identifier , " NumberSystem " sv , { } , locale_data . number_systems ) ;
generator . append ( R " ~~~(
}
2021-11-12 09:09:48 -05:00
) ~ ~ ~ " );
2022-02-06 16:00:13 -05:00
TRY ( file . write ( generator . as_string_view ( ) . bytes ( ) ) ) ;
return { } ;
2021-11-12 09:09:48 -05:00
}
2022-02-06 16:00:13 -05:00
static ErrorOr < void > generate_unicode_locale_implementation ( Core : : Stream : : BufferedFile & file , UnicodeLocaleData & locale_data )
2021-11-12 09:09:48 -05:00
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . set ( " string_index_type " sv , s_string_index_type ) ;
2021-12-02 19:59:09 -05:00
generator . set ( " number_format_index_type " sv , s_number_format_index_type ) ;
2021-12-10 16:03:17 -05:00
generator . set ( " number_format_list_index_type " sv , s_number_format_list_index_type ) ;
2021-12-11 00:37:34 -05:00
generator . set ( " numeric_symbol_list_index_type " sv , s_numeric_symbol_list_index_type ) ;
2021-11-16 13:53:45 -05:00
generator . set ( " identifier_count " , String : : number ( locale_data . max_identifier_count ) ) ;
2021-11-12 09:09:48 -05:00
generator . append ( R " ~~~(
# include <AK/Array.h>
# include <AK/BinarySearch.h>
2021-12-16 12:24:01 -05:00
# include <AK/Optional.h>
2021-11-12 09:09:48 -05:00
# include <AK/Span.h>
2021-12-16 12:24:01 -05:00
# include <AK/StringView.h>
# include <AK/Vector.h>
2021-11-12 09:09:48 -05:00
# include <LibUnicode/Locale.h>
2021-11-27 10:53:42 -05:00
# include <LibUnicode/NumberFormat.h>
2022-02-15 14:31:07 -05:00
# include <LibUnicode/UnicodeLocale.h>
2021-11-12 09:09:48 -05:00
# include <LibUnicode/UnicodeNumberFormat.h>
2022-01-04 10:57:00 -05:00
namespace Unicode {
2021-11-12 09:09:48 -05:00
) ~ ~ ~ " );
locale_data . unique_strings . generate ( generator ) ;
generator . append ( R " ~~~(
2022-01-04 10:57:00 -05:00
struct NumberFormatImpl {
NumberFormat to_unicode_number_format ( ) const {
NumberFormat number_format { } ;
2021-11-12 09:09:48 -05:00
number_format . magnitude = magnitude ;
2021-11-15 07:56:20 -05:00
number_format . exponent = exponent ;
2022-01-04 10:57:00 -05:00
number_format . plurality = static_cast < NumberFormat : : Plurality > ( plurality ) ;
2021-11-12 09:09:48 -05:00
number_format . zero_format = s_string_list [ zero_format ] ;
number_format . positive_format = s_string_list [ positive_format ] ;
number_format . negative_format = s_string_list [ negative_format ] ;
2021-11-16 13:53:45 -05:00
number_format . identifiers . ensure_capacity ( identifiers . size ( ) ) ;
for ( @ string_index_type @ identifier : identifiers )
number_format . identifiers . append ( s_string_list [ identifier ] ) ;
2021-11-12 09:09:48 -05:00
return number_format ;
}
u8 magnitude { 0 } ;
2021-11-15 07:56:20 -05:00
u8 exponent { 0 } ;
2021-11-12 09:09:48 -05:00
u8 plurality { 0 } ;
@ string_index_type @ zero_format { 0 } ;
@ string_index_type @ positive_format { 0 } ;
@ string_index_type @ negative_format { 0 } ;
2021-11-16 13:53:45 -05:00
Array < @ string_index_type @ , @ identifier_count @ > identifiers { } ;
2021-11-12 09:09:48 -05:00
} ;
2022-01-11 18:42:07 -05:00
struct NumberSystemData {
2021-12-11 00:37:34 -05:00
@ numeric_symbol_list_index_type @ symbols { 0 } ;
2021-11-12 09:09:48 -05:00
2021-11-13 22:03:22 -05:00
u8 primary_grouping_size { 0 } ;
u8 secondary_grouping_size { 0 } ;
2021-12-02 19:59:09 -05:00
@ number_format_index_type @ decimal_format { 0 } ;
2021-12-10 16:03:17 -05:00
@ number_format_list_index_type @ decimal_long_formats { 0 } ;
@ number_format_list_index_type @ decimal_short_formats { 0 } ;
2021-11-12 09:09:48 -05:00
2021-12-02 19:59:09 -05:00
@ number_format_index_type @ currency_format { 0 } ;
@ number_format_index_type @ accounting_format { 0 } ;
2021-12-10 16:03:17 -05:00
@ number_format_list_index_type @ currency_unit_formats { 0 } ;
@ number_format_list_index_type @ currency_short_formats { 0 } ;
2021-11-12 09:09:48 -05:00
2021-12-02 19:59:09 -05:00
@ number_format_index_type @ percent_format { 0 } ;
@ number_format_index_type @ scientific_format { 0 } ;
2021-11-12 09:09:48 -05:00
} ;
2021-11-16 09:31:15 -05:00
struct Unit {
@ string_index_type @ unit { 0 } ;
2021-12-10 16:36:39 -05:00
@ number_format_list_index_type @ long_formats { 0 } ;
@ number_format_list_index_type @ short_formats { 0 } ;
@ number_format_list_index_type @ narrow_formats { 0 } ;
2021-11-16 09:31:15 -05:00
} ;
2021-11-12 09:09:48 -05:00
) ~ ~ ~ " );
2022-01-04 10:57:00 -05:00
locale_data . unique_formats . generate ( generator , " NumberFormatImpl " sv , " s_number_formats " sv , 10 ) ;
2021-12-10 16:03:17 -05:00
locale_data . unique_format_lists . generate ( generator , s_number_format_index_type , " s_number_format_lists " sv ) ;
2021-12-11 00:37:34 -05:00
locale_data . unique_symbols . generate ( generator , s_string_index_type , " s_numeric_symbol_lists " sv ) ;
2022-01-11 18:42:07 -05:00
locale_data . unique_systems . generate ( generator , " NumberSystemData " sv , " s_number_systems " sv , 10 ) ;
2021-12-11 08:30:50 -05:00
locale_data . unique_units . generate ( generator , " Unit " sv , " s_units " sv , 10 ) ;
2021-11-12 09:09:48 -05:00
2022-01-27 12:40:31 -05:00
auto locales = locale_data . locales . keys ( ) ;
quick_sort ( locales ) ;
generator . set ( " size " , String : : number ( locales . size ( ) ) ) ;
generator . append ( R " ~~~(
static constexpr Array < u8 , @ size @ > s_minimum_grouping_digits { { ) ~ ~ ~ " );
bool first = true ;
for ( auto const & locale : locales ) {
generator . append ( first ? " " : " , " ) ;
generator . append ( String : : number ( locale_data . locales . find ( locale ) - > value . minimum_grouping_digits ) ) ;
first = false ;
}
generator . append ( " } }; \n " ) ;
2021-12-11 08:12:07 -05:00
auto append_map = [ & ] ( String name , auto type , auto const & map ) {
2021-11-12 09:09:48 -05:00
generator . set ( " name " , name ) ;
2021-12-11 08:12:07 -05:00
generator . set ( " type " , type ) ;
generator . set ( " size " , String : : number ( map . size ( ) ) ) ;
2021-11-12 09:09:48 -05:00
generator . append ( R " ~~~(
2021-12-11 08:12:07 -05:00
static constexpr Array < @ type @ , @ size @ > @ name @ { { ) ~ ~ ~ " );
bool first = true ;
for ( auto const & item : map ) {
generator . append ( first ? " " : " , " ) ;
2022-01-11 18:42:07 -05:00
if constexpr ( requires { item . value ; } )
generator . append ( String : : number ( item . value ) ) ;
else
generator . append ( String : : number ( item ) ) ;
2021-12-11 08:12:07 -05:00
first = false ;
2021-11-12 09:09:48 -05:00
}
2021-12-11 08:12:07 -05:00
generator . append ( " } }; " ) ;
2021-11-12 09:09:48 -05:00
} ;
2022-01-11 18:42:07 -05:00
generate_mapping ( generator , locale_data . number_system_digits , " u32 " sv , " s_number_systems_digits " sv , " s_number_systems_digits_{} " , nullptr , [ & ] ( auto const & name , auto const & value ) { append_map ( name , " u32 " sv , value ) ; } ) ;
2022-01-10 16:34:55 -05:00
generate_mapping ( generator , locale_data . locales , s_number_system_index_type , " s_locale_number_systems " sv , " s_number_systems_{} " , nullptr , [ & ] ( auto const & name , auto const & value ) { append_map ( name , s_number_system_index_type , value . number_systems ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_unit_index_type , " s_locale_units " sv , " s_units_{} " , nullptr , [ & ] ( auto const & name , auto const & value ) { append_map ( name , s_unit_index_type , value . units ) ; } ) ;
2021-11-12 09:09:48 -05:00
2022-02-15 14:31:07 -05:00
generator . append ( R " ~~~(
static Optional < NumberSystem > keyword_to_number_system ( KeywordNumbers keyword )
{
switch ( keyword ) { ) ~ ~ ~ " );
2022-01-11 18:42:07 -05:00
2022-02-15 14:31:07 -05:00
for ( auto const & number_system : locale_data . number_systems ) {
generator . set ( " name " sv , format_identifier ( { } , number_system ) ) ;
generator . append ( R " ~~~(
case KeywordNumbers : : @ name @ :
return NumberSystem : : @ name @ ; ) ~ ~ ~ " );
}
2022-01-11 18:42:07 -05:00
2021-11-12 09:09:48 -05:00
generator . append ( R " ~~~(
2022-02-15 14:31:07 -05:00
default :
return { } ;
}
}
2022-01-11 18:42:07 -05:00
Optional < Span < u32 const > > get_digits_for_number_system ( StringView system )
{
2022-02-15 14:31:07 -05:00
auto number_system_keyword = keyword_nu_from_string ( system ) ;
if ( ! number_system_keyword . has_value ( ) )
return { } ;
auto number_system_value = keyword_to_number_system ( * number_system_keyword ) ;
2022-01-11 18:42:07 -05:00
if ( ! number_system_value . has_value ( ) )
return { } ;
auto number_system_index = to_underlying ( * number_system_value ) ;
return s_number_systems_digits [ number_system_index ] ;
}
static NumberSystemData const * find_number_system ( StringView locale , StringView system )
2021-11-12 09:09:48 -05:00
{
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return nullptr ;
2022-02-15 14:31:07 -05:00
auto number_system_keyword = keyword_nu_from_string ( system ) ;
if ( ! number_system_keyword . has_value ( ) )
return { } ;
auto number_system_value = keyword_to_number_system ( * number_system_keyword ) ;
2022-01-11 18:50:15 -05:00
if ( ! number_system_value . has_value ( ) )
2022-02-15 14:31:07 -05:00
return { } ;
2022-01-11 18:50:15 -05:00
2021-11-12 09:09:48 -05:00
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
2022-01-11 18:50:15 -05:00
auto number_system_index = to_underlying ( * number_system_value ) ;
2021-12-11 08:12:07 -05:00
2022-01-11 18:50:15 -05:00
auto const & number_systems = s_locale_number_systems . at ( locale_index ) ;
number_system_index = number_systems . at ( number_system_index ) ;
2021-11-12 09:09:48 -05:00
2022-01-11 18:50:15 -05:00
if ( number_system_index = = 0 )
return nullptr ;
2021-11-12 09:09:48 -05:00
2022-01-11 18:50:15 -05:00
return & s_number_systems . at ( number_system_index ) ;
2021-11-12 09:09:48 -05:00
}
2022-01-04 10:57:00 -05:00
Optional < StringView > get_number_system_symbol ( StringView locale , StringView system , NumericSymbol symbol )
2021-11-12 09:09:48 -05:00
{
if ( auto const * number_system = find_number_system ( locale , system ) ; number_system ! = nullptr ) {
2021-12-11 00:37:34 -05:00
auto symbols = s_numeric_symbol_lists . at ( number_system - > symbols ) ;
auto symbol_index = to_underlying ( symbol ) ;
if ( symbol_index > = symbols . size ( ) )
return { } ;
return s_string_list [ symbols [ symbol_index ] ] ;
2021-11-12 09:09:48 -05:00
}
return { } ;
}
2021-11-13 22:03:22 -05:00
Optional < NumberGroupings > get_number_system_groupings ( StringView locale , StringView system )
{
2022-01-27 12:40:31 -05:00
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return { } ;
u8 minimum_grouping_digits = s_minimum_grouping_digits [ to_underlying ( * locale_value ) - 1 ] ;
2021-11-13 22:03:22 -05:00
if ( auto const * number_system = find_number_system ( locale , system ) ; number_system ! = nullptr )
2022-01-27 12:40:31 -05:00
return NumberGroupings { minimum_grouping_digits , number_system - > primary_grouping_size , number_system - > secondary_grouping_size } ;
2021-11-13 22:03:22 -05:00
return { } ;
}
2022-01-04 10:57:00 -05:00
Optional < NumberFormat > get_standard_number_system_format ( StringView locale , StringView system , StandardNumberFormatType type )
2021-11-12 09:09:48 -05:00
{
if ( auto const * number_system = find_number_system ( locale , system ) ; number_system ! = nullptr ) {
2021-12-02 19:59:09 -05:00
@ number_format_index_type @ format_index = 0 ;
2021-11-12 09:09:48 -05:00
switch ( type ) {
case StandardNumberFormatType : : Decimal :
2021-12-02 19:59:09 -05:00
format_index = number_system - > decimal_format ;
break ;
2021-11-12 09:09:48 -05:00
case StandardNumberFormatType : : Currency :
2021-12-02 19:59:09 -05:00
format_index = number_system - > currency_format ;
break ;
2021-11-12 09:09:48 -05:00
case StandardNumberFormatType : : Accounting :
2021-12-02 19:59:09 -05:00
format_index = number_system - > accounting_format ;
break ;
2021-11-12 09:09:48 -05:00
case StandardNumberFormatType : : Percent :
2021-12-02 19:59:09 -05:00
format_index = number_system - > percent_format ;
break ;
2021-11-14 08:35:10 -05:00
case StandardNumberFormatType : : Scientific :
2021-12-02 19:59:09 -05:00
format_index = number_system - > scientific_format ;
break ;
2021-11-12 09:09:48 -05:00
}
2021-12-02 19:59:09 -05:00
return s_number_formats [ format_index ] . to_unicode_number_format ( ) ;
2021-11-12 09:09:48 -05:00
}
return { } ;
}
2022-01-04 10:57:00 -05:00
Vector < NumberFormat > get_compact_number_system_formats ( StringView locale , StringView system , CompactNumberFormatType type )
2021-11-12 09:09:48 -05:00
{
2022-01-04 10:57:00 -05:00
Vector < NumberFormat > formats ;
2021-11-12 09:09:48 -05:00
if ( auto const * number_system = find_number_system ( locale , system ) ; number_system ! = nullptr ) {
2021-12-10 16:03:17 -05:00
@ number_format_list_index_type @ number_format_list_index { 0 } ;
2021-11-12 09:09:48 -05:00
switch ( type ) {
case CompactNumberFormatType : : DecimalLong :
2021-12-10 16:03:17 -05:00
number_format_list_index = number_system - > decimal_long_formats ;
2021-11-12 09:09:48 -05:00
break ;
case CompactNumberFormatType : : DecimalShort :
2021-12-10 16:03:17 -05:00
number_format_list_index = number_system - > decimal_short_formats ;
2021-11-12 09:09:48 -05:00
break ;
2021-11-12 17:16:30 -05:00
case CompactNumberFormatType : : CurrencyUnit :
2021-12-10 16:03:17 -05:00
number_format_list_index = number_system - > currency_unit_formats ;
2021-11-12 17:16:30 -05:00
break ;
2021-11-12 09:09:48 -05:00
case CompactNumberFormatType : : CurrencyShort :
2021-12-10 16:03:17 -05:00
number_format_list_index = number_system - > currency_short_formats ;
2021-11-12 09:09:48 -05:00
break ;
}
2021-12-10 16:03:17 -05:00
auto number_formats = s_number_format_lists . at ( number_format_list_index ) ;
2021-11-12 09:09:48 -05:00
formats . ensure_capacity ( number_formats . size ( ) ) ;
2021-12-02 19:59:09 -05:00
for ( auto number_format : number_formats )
formats . append ( s_number_formats [ number_format ] . to_unicode_number_format ( ) ) ;
2021-11-12 09:09:48 -05:00
}
return formats ;
}
2021-11-16 09:31:15 -05:00
static Unit const * find_units ( StringView locale , StringView unit )
{
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return nullptr ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
2021-12-11 08:30:50 -05:00
auto const & locale_units = s_locale_units . at ( locale_index ) ;
for ( auto unit_index : locale_units ) {
auto const & units = s_units . at ( unit_index ) ;
2021-11-16 09:31:15 -05:00
if ( unit = = s_string_list [ units . unit ] )
return & units ;
} ;
return nullptr ;
}
2022-01-04 10:57:00 -05:00
Vector < NumberFormat > get_unit_formats ( StringView locale , StringView unit , Style style )
2021-11-16 09:31:15 -05:00
{
2022-01-04 10:57:00 -05:00
Vector < NumberFormat > formats ;
2021-11-16 09:31:15 -05:00
if ( auto const * units = find_units ( locale , unit ) ; units ! = nullptr ) {
2021-12-10 16:36:39 -05:00
@ number_format_list_index_type @ number_format_list_index { 0 } ;
2021-11-16 09:31:15 -05:00
switch ( style ) {
case Style : : Long :
2021-12-10 16:36:39 -05:00
number_format_list_index = units - > long_formats ;
2021-11-16 09:31:15 -05:00
break ;
case Style : : Short :
2021-12-10 16:36:39 -05:00
number_format_list_index = units - > short_formats ;
2021-11-16 09:31:15 -05:00
break ;
case Style : : Narrow :
2021-12-10 16:36:39 -05:00
number_format_list_index = units - > narrow_formats ;
2021-11-16 09:31:15 -05:00
break ;
default :
VERIFY_NOT_REACHED ( ) ;
}
2021-12-10 16:36:39 -05:00
auto number_formats = s_number_format_lists . at ( number_format_list_index ) ;
2021-11-16 09:31:15 -05:00
formats . ensure_capacity ( number_formats . size ( ) ) ;
2021-12-02 19:59:09 -05:00
for ( auto number_format : number_formats )
formats . append ( s_number_formats [ number_format ] . to_unicode_number_format ( ) ) ;
2021-11-16 09:31:15 -05:00
}
return formats ;
}
2021-11-12 09:09:48 -05:00
}
) ~ ~ ~ " );
2022-02-06 16:00:13 -05:00
TRY ( file . write ( generator . as_string_view ( ) . bytes ( ) ) ) ;
return { } ;
2021-11-12 09:09:48 -05:00
}
2021-11-23 11:00:27 -05:00
ErrorOr < int > serenity_main ( Main : : Arguments arguments )
2021-11-12 09:09:48 -05:00
{
2022-01-11 18:42:07 -05:00
StringView generated_header_path ;
StringView generated_implementation_path ;
StringView core_path ;
StringView numbers_path ;
StringView units_path ;
2021-11-12 09:09:48 -05:00
Core : : ArgsParser args_parser ;
args_parser . add_option ( generated_header_path , " Path to the Unicode locale header file to generate " , " generated-header-path " , ' h ' , " generated-header-path " ) ;
args_parser . add_option ( generated_implementation_path , " Path to the Unicode locale implementation file to generate " , " generated-implementation-path " , ' c ' , " generated-implementation-path " ) ;
2022-01-11 18:42:07 -05:00
args_parser . add_option ( core_path , " Path to cldr-core directory " , " core-path " , ' r ' , " core-path " ) ;
2021-11-12 09:09:48 -05:00
args_parser . add_option ( numbers_path , " Path to cldr-numbers directory " , " numbers-path " , ' n ' , " numbers-path " ) ;
2021-11-16 09:31:15 -05:00
args_parser . add_option ( units_path , " Path to cldr-units directory " , " units-path " , ' u ' , " units-path " ) ;
2021-11-23 11:00:27 -05:00
args_parser . parse ( arguments ) ;
2021-11-12 09:09:48 -05:00
2022-02-06 16:00:13 -05:00
auto generated_header_file = TRY ( open_file ( generated_header_path , Core : : Stream : : OpenMode : : Write ) ) ;
auto generated_implementation_file = TRY ( open_file ( generated_implementation_path , Core : : Stream : : OpenMode : : Write ) ) ;
2021-11-12 09:09:48 -05:00
UnicodeLocaleData locale_data ;
2022-01-11 18:42:07 -05:00
TRY ( parse_all_locales ( core_path , numbers_path , units_path , locale_data ) ) ;
2021-11-12 09:09:48 -05:00
2022-02-06 16:00:13 -05:00
TRY ( generate_unicode_locale_header ( * generated_header_file , locale_data ) ) ;
TRY ( generate_unicode_locale_implementation ( * generated_implementation_file , locale_data ) ) ;
2021-11-12 09:09:48 -05:00
return 0 ;
}