2021-08-24 19:50:18 -04:00
/*
* Copyright ( c ) 2021 , Tim Flynn < trflynn89 @ pm . me >
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
2021-10-12 12:22:47 -04:00
# include "GeneratorUtil.h"
2021-08-24 19:50:18 -04:00
# include <AK/AllOf.h>
# include <AK/CharacterTypes.h>
# include <AK/Format.h>
# include <AK/HashMap.h>
# include <AK/JsonObject.h>
# include <AK/JsonParser.h>
# include <AK/JsonValue.h>
# include <AK/LexicalPath.h>
# include <AK/QuickSort.h>
# include <AK/SourceGenerator.h>
# include <AK/String.h>
# include <AK/StringBuilder.h>
# include <LibCore/ArgsParser.h>
# include <LibCore/DirIterator.h>
# include <LibCore/File.h>
2021-10-14 18:31:09 -04:00
using StringIndexType = u16 ;
constexpr auto s_string_index_type = " u16 " sv ;
2021-09-06 13:56:44 -04:00
struct ListPatterns {
String type ;
String style ;
2021-10-14 18:31:09 -04:00
StringIndexType start { 0 } ;
StringIndexType middle { 0 } ;
StringIndexType end { 0 } ;
StringIndexType pair { 0 } ;
2021-09-06 13:56:44 -04:00
} ;
2021-08-24 19:50:18 -04:00
struct Locale {
String language ;
Optional < String > territory ;
Optional < String > variant ;
2021-10-14 18:31:09 -04:00
HashMap < String , StringIndexType > languages ;
HashMap < String , StringIndexType > territories ;
HashMap < String , StringIndexType > scripts ;
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
HashMap < String , StringIndexType > long_currencies ;
HashMap < String , StringIndexType > short_currencies ;
HashMap < String , StringIndexType > narrow_currencies ;
HashMap < String , StringIndexType > numeric_currencies ;
2021-10-14 18:31:09 -04:00
HashMap < String , StringIndexType > keywords ;
2021-09-06 13:56:44 -04:00
Vector < ListPatterns > list_patterns ;
2021-08-24 19:50:18 -04:00
} ;
2021-09-02 17:46:35 -04:00
struct LanguageMapping {
2021-11-12 08:48:21 -05:00
CanonicalLanguageID < StringIndexType > key { } ;
CanonicalLanguageID < StringIndexType > alias { } ;
2021-09-02 17:46:35 -04:00
} ;
2021-08-24 19:50:18 -04:00
struct UnicodeLocaleData {
2021-11-12 08:48:21 -05:00
UniqueStringStorage < StringIndexType > unique_strings ;
2021-11-17 08:20:19 -05:00
2021-08-24 19:50:18 -04:00
HashMap < String , Locale > locales ;
2021-11-17 08:20:19 -05:00
Vector < Alias > locale_aliases ;
2021-08-24 19:50:18 -04:00
Vector < String > languages ;
Vector < String > territories ;
2021-08-26 08:29:39 -04:00
Vector < String > scripts ;
2021-08-24 19:50:18 -04:00
Vector < String > variants ;
2021-08-26 08:38:54 -04:00
Vector < String > currencies ;
2021-09-10 09:56:11 -04:00
Vector < String > keywords ;
2021-09-06 13:56:44 -04:00
Vector < String > list_pattern_types ;
Vector < String > list_pattern_styles ;
2021-10-14 18:31:09 -04:00
HashMap < String , StringIndexType > language_aliases ;
HashMap < String , StringIndexType > territory_aliases ;
HashMap < String , StringIndexType > script_aliases ;
HashMap < String , StringIndexType > variant_aliases ;
HashMap < String , StringIndexType > subdivision_aliases ;
2021-09-02 17:46:35 -04:00
Vector < LanguageMapping > complex_mappings ;
Vector < LanguageMapping > likely_subtags ;
size_t max_variant_size { 0 } ;
2021-08-24 19:50:18 -04:00
} ;
2021-11-23 11:54:53 -05:00
// Some parsing is expected to fail. For example, the CLDR contains language mappings
// with locales such as "en-GB-oed" that are canonically invalid locale IDs.
# define TRY_OR_DISCARD(expression) \
( { \
auto _temporary_result = ( expression ) ; \
if ( _temporary_result . is_error ( ) ) \
return ; \
_temporary_result . release_value ( ) ; \
} )
static ErrorOr < LanguageMapping > parse_language_mapping ( UnicodeLocaleData & locale_data , StringView key , StringView alias )
2021-09-02 17:46:35 -04:00
{
2021-11-23 11:54:53 -05:00
auto parsed_key = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , key ) ) ;
auto parsed_alias = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , alias ) ) ;
return LanguageMapping { move ( parsed_key ) , move ( parsed_alias ) } ;
2021-09-02 17:46:35 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_core_aliases ( String core_supplemental_path , UnicodeLocaleData & locale_data )
2021-08-30 14:56:23 -04:00
{
LexicalPath core_aliases_path ( move ( core_supplemental_path ) ) ;
core_aliases_path = core_aliases_path . append ( " aliases.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto core_aliases_file = TRY ( Core : : File : : open ( core_aliases_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto core_aliases = TRY ( JsonValue : : from_string ( core_aliases_file - > read_all ( ) ) ) ;
2021-08-30 14:56:23 -04:00
2021-11-15 01:46:51 +01:00
auto const & supplemental_object = core_aliases . as_object ( ) . get ( " supplemental " sv ) ;
2021-08-30 14:56:23 -04:00
auto const & metadata_object = supplemental_object . as_object ( ) . get ( " metadata " sv ) ;
auto const & alias_object = metadata_object . as_object ( ) . get ( " alias " sv ) ;
2021-09-02 17:46:35 -04:00
auto append_aliases = [ & ] ( auto & alias_object , auto & alias_map ) {
2021-08-30 14:56:23 -04:00
alias_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto alias = value . as_object ( ) . get ( " _replacement " sv ) . as_string ( ) ;
2021-08-31 10:03:49 -04:00
2021-09-02 17:46:35 -04:00
if ( key . contains ( ' - ' ) ) {
2021-11-23 11:54:53 -05:00
auto mapping = TRY_OR_DISCARD ( parse_language_mapping ( locale_data , key , alias ) ) ;
locale_data . max_variant_size = max ( mapping . key . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . max_variant_size = max ( mapping . alias . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . complex_mappings . append ( move ( mapping ) ) ;
2021-09-02 17:46:35 -04:00
} else {
2021-11-12 08:48:21 -05:00
alias_map . set ( key , locale_data . unique_strings . ensure ( alias ) ) ;
2021-09-02 17:46:35 -04:00
}
2021-08-30 14:56:23 -04:00
} ) ;
} ;
append_aliases ( alias_object . as_object ( ) . get ( " languageAlias " sv ) , locale_data . language_aliases ) ;
2021-09-02 17:46:35 -04:00
append_aliases ( alias_object . as_object ( ) . get ( " territoryAlias " sv ) , locale_data . territory_aliases ) ;
2021-08-30 14:56:23 -04:00
append_aliases ( alias_object . as_object ( ) . get ( " scriptAlias " sv ) , locale_data . script_aliases ) ;
append_aliases ( alias_object . as_object ( ) . get ( " variantAlias " sv ) , locale_data . variant_aliases ) ;
append_aliases ( alias_object . as_object ( ) . get ( " subdivisionAlias " sv ) , locale_data . subdivision_aliases ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-30 14:56:23 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_likely_subtags ( String core_supplemental_path , UnicodeLocaleData & locale_data )
2021-08-31 09:40:24 -04:00
{
LexicalPath likely_subtags_path ( move ( core_supplemental_path ) ) ;
likely_subtags_path = likely_subtags_path . append ( " likelySubtags.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto likely_subtags_file = TRY ( Core : : File : : open ( likely_subtags_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto likely_subtags = TRY ( JsonValue : : from_string ( likely_subtags_file - > read_all ( ) ) ) ;
2021-08-31 09:40:24 -04:00
2021-11-15 01:46:51 +01:00
auto const & supplemental_object = likely_subtags . as_object ( ) . get ( " supplemental " sv ) ;
2021-08-31 09:40:24 -04:00
auto const & likely_subtags_object = supplemental_object . as_object ( ) . get ( " likelySubtags " sv ) ;
likely_subtags_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
2021-11-23 11:54:53 -05:00
auto mapping = TRY_OR_DISCARD ( parse_language_mapping ( locale_data , key , value . as_string ( ) ) ) ;
locale_data . max_variant_size = max ( mapping . key . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . max_variant_size = max ( mapping . alias . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . likely_subtags . append ( move ( mapping ) ) ;
2021-08-31 09:40:24 -04:00
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-31 09:40:24 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_identity ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-24 19:50:18 -04:00
{
LexicalPath languages_path ( move ( locale_path ) ) ; // Note: Every JSON file defines identity data, so we can use any of them.
languages_path = languages_path . append ( " languages.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto languages_file = TRY ( Core : : File : : open ( languages_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto languages = TRY ( JsonValue : : from_string ( languages_file - > read_all ( ) ) ) ;
2021-08-24 19:50:18 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = languages . as_object ( ) . get ( " main " sv ) ;
2021-08-24 19:50:18 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( languages_path . parent ( ) . basename ( ) ) ;
auto const & identity_object = locale_object . as_object ( ) . get ( " identity " sv ) ;
auto const & language_string = identity_object . as_object ( ) . get ( " language " sv ) ;
auto const & territory_string = identity_object . as_object ( ) . get ( " territory " sv ) ;
auto const & variant_string = identity_object . as_object ( ) . get ( " variant " sv ) ;
locale . language = language_string . as_string ( ) ;
if ( ! locale_data . languages . contains_slow ( locale . language ) )
locale_data . languages . append ( locale . language ) ;
if ( territory_string . is_string ( ) ) {
locale . territory = territory_string . as_string ( ) ;
if ( ! locale_data . territories . contains_slow ( * locale . territory ) )
locale_data . territories . append ( * locale . territory ) ;
}
if ( variant_string . is_string ( ) ) {
locale . variant = variant_string . as_string ( ) ;
if ( ! locale_data . variants . contains_slow ( * locale . variant ) )
locale_data . variants . append ( * locale . variant ) ;
}
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-24 19:50:18 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_locale_languages ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-26 08:17:01 -04:00
{
LexicalPath languages_path ( move ( locale_path ) ) ;
languages_path = languages_path . append ( " languages.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto languages_file = TRY ( Core : : File : : open ( languages_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto languages = TRY ( JsonValue : : from_string ( languages_file - > read_all ( ) ) ) ;
2021-08-26 08:17:01 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = languages . as_object ( ) . get ( " main " sv ) ;
2021-08-26 08:17:01 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( languages_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & languages_object = locale_display_names_object . as_object ( ) . get ( " languages " sv ) ;
languages_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
2021-10-10 11:31:49 -04:00
if ( ! locale_data . languages . contains_slow ( key ) )
return ;
2021-11-12 08:48:21 -05:00
auto index = locale_data . unique_strings . ensure ( value . as_string ( ) ) ;
2021-10-10 11:41:13 -04:00
locale . languages . set ( key , index ) ;
2021-08-26 08:17:01 -04:00
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-26 08:17:01 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_locale_territories ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-24 19:50:18 -04:00
{
LexicalPath territories_path ( move ( locale_path ) ) ;
territories_path = territories_path . append ( " territories.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto territories_file = TRY ( Core : : File : : open ( territories_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto territories = TRY ( JsonValue : : from_string ( territories_file - > read_all ( ) ) ) ;
2021-08-24 19:50:18 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = territories . as_object ( ) . get ( " main " sv ) ;
2021-08-24 19:50:18 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( territories_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & territories_object = locale_display_names_object . as_object ( ) . get ( " territories " sv ) ;
territories_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
2021-10-10 11:31:49 -04:00
if ( ! locale_data . territories . contains_slow ( key ) )
return ;
2021-11-12 08:48:21 -05:00
auto index = locale_data . unique_strings . ensure ( value . as_string ( ) ) ;
2021-10-10 11:41:13 -04:00
locale . territories . set ( key , index ) ;
2021-08-24 19:50:18 -04:00
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-24 19:50:18 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_locale_scripts ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-26 08:29:39 -04:00
{
LexicalPath scripts_path ( move ( locale_path ) ) ;
scripts_path = scripts_path . append ( " scripts.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto scripts_file = TRY ( Core : : File : : open ( scripts_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto scripts = TRY ( JsonValue : : from_string ( scripts_file - > read_all ( ) ) ) ;
2021-08-26 08:29:39 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = scripts . as_object ( ) . get ( " main " sv ) ;
2021-08-26 08:29:39 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( scripts_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & scripts_object = locale_display_names_object . as_object ( ) . get ( " scripts " sv ) ;
scripts_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
2021-11-12 08:48:21 -05:00
auto index = locale_data . unique_strings . ensure ( value . as_string ( ) ) ;
2021-10-10 11:41:13 -04:00
locale . scripts . set ( key , index ) ;
2021-08-26 08:29:39 -04:00
if ( ! locale_data . scripts . contains_slow ( key ) )
locale_data . scripts . append ( key ) ;
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-26 08:29:39 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_locale_list_patterns ( String misc_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-09-06 13:56:44 -04:00
{
LexicalPath list_patterns_path ( move ( misc_path ) ) ;
list_patterns_path = list_patterns_path . append ( " listPatterns.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto list_patterns_file = TRY ( Core : : File : : open ( list_patterns_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto list_patterns = TRY ( JsonValue : : from_string ( list_patterns_file - > read_all ( ) ) ) ;
2021-09-06 13:56:44 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = list_patterns . as_object ( ) . get ( " main " sv ) ;
2021-09-06 13:56:44 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( list_patterns_path . parent ( ) . basename ( ) ) ;
auto const & list_patterns_object = locale_object . as_object ( ) . get ( " listPatterns " sv ) ;
auto list_pattern_type = [ ] ( StringView key ) {
if ( key . contains ( " type-standard " sv ) )
return " conjunction " sv ;
if ( key . contains ( " type-or " sv ) )
return " disjunction " sv ;
if ( key . contains ( " type-unit " sv ) )
return " unit " sv ;
VERIFY_NOT_REACHED ( ) ;
} ;
auto list_pattern_style = [ ] ( StringView key ) {
if ( key . contains ( " short " sv ) )
return " short " sv ;
if ( key . contains ( " narrow " sv ) )
return " narrow " sv ;
return " long " sv ;
} ;
list_patterns_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto type = list_pattern_type ( key ) ;
auto style = list_pattern_style ( key ) ;
2021-11-12 08:48:21 -05:00
auto start = locale_data . unique_strings . ensure ( value . as_object ( ) . get ( " start " sv ) . as_string ( ) ) ;
auto middle = locale_data . unique_strings . ensure ( value . as_object ( ) . get ( " middle " sv ) . as_string ( ) ) ;
auto end = locale_data . unique_strings . ensure ( value . as_object ( ) . get ( " end " sv ) . as_string ( ) ) ;
auto pair = locale_data . unique_strings . ensure ( value . as_object ( ) . get ( " 2 " sv ) . as_string ( ) ) ;
2021-09-06 13:56:44 -04:00
if ( ! locale_data . list_pattern_types . contains_slow ( type ) )
locale_data . list_pattern_types . append ( type ) ;
if ( ! locale_data . list_pattern_styles . contains_slow ( style ) )
locale_data . list_pattern_styles . append ( style ) ;
locale . list_patterns . append ( { move ( type ) , move ( style ) , move ( start ) , move ( middle ) , move ( end ) , move ( pair ) } ) ;
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-09-06 13:56:44 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_locale_currencies ( String numbers_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-24 19:50:18 -04:00
{
2021-08-26 08:38:54 -04:00
LexicalPath currencies_path ( move ( numbers_path ) ) ;
currencies_path = currencies_path . append ( " currencies.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto currencies_file = TRY ( Core : : File : : open ( currencies_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto currencies = TRY ( JsonValue : : from_string ( currencies_file - > read_all ( ) ) ) ;
2021-08-26 08:38:54 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = currencies . as_object ( ) . get ( " main " sv ) ;
2021-08-26 08:38:54 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( currencies_path . parent ( ) . basename ( ) ) ;
auto const & locale_numbers_object = locale_object . as_object ( ) . get ( " numbers " sv ) ;
auto const & currencies_object = locale_numbers_object . as_object ( ) . get ( " currencies " sv ) ;
currencies_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
auto const & long_name = value . as_object ( ) . get ( " displayName " sv ) ;
auto const & short_name = value . as_object ( ) . get ( " symbol " sv ) ;
auto const & narrow_name = value . as_object ( ) . get ( " symbol-alt-narrow " sv ) ;
auto const & numeric_name = value . as_object ( ) . get ( " displayName-count-other " sv ) ;
2021-10-10 11:41:13 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
locale . long_currencies . set ( key , locale_data . unique_strings . ensure ( long_name . as_string ( ) ) ) ;
locale . short_currencies . set ( key , locale_data . unique_strings . ensure ( short_name . as_string ( ) ) ) ;
locale . narrow_currencies . set ( key , narrow_name . is_null ( ) ? 0 : locale_data . unique_strings . ensure ( narrow_name . as_string ( ) ) ) ;
locale . numeric_currencies . set ( key , locale_data . unique_strings . ensure ( numeric_name . is_null ( ) ? long_name . as_string ( ) : numeric_name . as_string ( ) ) ) ;
2021-10-10 11:41:13 -04:00
2021-08-26 08:38:54 -04:00
if ( ! locale_data . currencies . contains_slow ( key ) )
locale_data . currencies . append ( key ) ;
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-26 08:38:54 -04:00
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_numeric_keywords ( String locale_numbers_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-09-10 09:56:11 -04:00
{
static constexpr StringView key = " nu " sv ;
LexicalPath numbers_path ( move ( locale_numbers_path ) ) ;
numbers_path = numbers_path . append ( " numbers.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto numbers_file = TRY ( Core : : File : : open ( numbers_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto numbers = TRY ( JsonValue : : from_string ( numbers_file - > read_all ( ) ) ) ;
2021-09-10 09:56:11 -04:00
2021-11-15 01:46:51 +01:00
auto const & main_object = numbers . as_object ( ) . get ( " main " sv ) ;
2021-09-10 09:56:11 -04:00
auto const & locale_object = main_object . as_object ( ) . get ( numbers_path . parent ( ) . basename ( ) ) ;
auto const & locale_numbers_object = locale_object . as_object ( ) . get ( " numbers " sv ) ;
auto const & default_numbering_system_object = locale_numbers_object . as_object ( ) . get ( " defaultNumberingSystem " sv ) ;
auto const & other_numbering_systems_object = locale_numbers_object . as_object ( ) . get ( " otherNumberingSystems " sv ) ;
Vector < String > keyword_values { } ;
keyword_values . append ( default_numbering_system_object . as_string ( ) ) ;
other_numbering_systems_object . as_object ( ) . for_each_member ( [ & ] ( auto const & , JsonValue const & value ) {
auto keyword_value = value . as_string ( ) ;
if ( ! keyword_values . contains_slow ( keyword_value ) )
keyword_values . append ( move ( keyword_value ) ) ;
} ) ;
2021-11-10 14:49:03 -05:00
locale_numbers_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
if ( ! key . starts_with ( " defaultNumberingSystem-alt- " sv ) )
return ;
auto keyword_value = value . as_string ( ) ;
if ( ! keyword_values . contains_slow ( keyword_value ) )
keyword_values . append ( move ( keyword_value ) ) ;
} ) ;
2021-09-10 09:56:11 -04:00
StringBuilder builder ;
builder . join ( ' , ' , keyword_values ) ;
2021-10-10 11:41:13 -04:00
2021-11-12 08:48:21 -05:00
auto index = locale_data . unique_strings . ensure ( builder . build ( ) ) ;
2021-10-10 11:41:13 -04:00
locale . keywords . set ( key , index ) ;
2021-09-10 09:56:11 -04:00
if ( ! locale_data . keywords . contains_slow ( key ) )
locale_data . keywords . append ( key ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-09-10 09:56:11 -04:00
}
2021-11-28 10:39:55 -05:00
static ErrorOr < void > parse_calendar_keywords ( String locale_dates_path , UnicodeLocaleData & locale_data , Locale & locale )
{
static constexpr StringView key = " ca " sv ;
auto calendars_iterator = TRY ( path_to_dir_iterator ( locale_dates_path , { } ) ) ;
Vector < String > keyword_values { } ;
while ( calendars_iterator . has_next ( ) ) {
auto locale_calendars_path = TRY ( next_path_from_dir_iterator ( calendars_iterator ) ) ;
LexicalPath calendars_path ( move ( locale_calendars_path ) ) ;
if ( ! calendars_path . basename ( ) . starts_with ( " ca- " sv ) )
continue ;
auto calendars_file = TRY ( Core : : File : : open ( calendars_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto calendars = TRY ( JsonValue : : from_string ( calendars_file - > read_all ( ) ) ) ;
auto const & main_object = calendars . as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( calendars_path . parent ( ) . basename ( ) ) ;
auto const & dates_object = locale_object . as_object ( ) . get ( " dates " sv ) ;
auto const & calendars_object = dates_object . as_object ( ) . get ( " calendars " sv ) ;
calendars_object . as_object ( ) . for_each_member ( [ & ] ( auto const & calendar_name , JsonValue const & ) {
2021-11-30 21:23:13 -05:00
// The generic calendar is not a supported Unicode calendar key, so skip it:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
if ( calendar_name = = " generic " sv )
return ;
2021-11-28 10:39:55 -05:00
// FIXME: Similar to the calendar aliases defined in GenerateUnicodeDateTimeFormat, this
// should be parsed from BCP47. https://unicode-org.atlassian.net/browse/CLDR-15158
if ( calendar_name = = " gregorian " sv )
keyword_values . append ( " gregory " sv ) ;
2021-11-30 21:23:13 -05:00
else
keyword_values . append ( calendar_name ) ;
2021-11-28 10:39:55 -05:00
} ) ;
}
StringBuilder builder ;
builder . join ( ' , ' , keyword_values ) ;
auto index = locale_data . unique_strings . ensure ( builder . build ( ) ) ;
locale . keywords . set ( key , index ) ;
if ( ! locale_data . keywords . contains_slow ( key ) )
locale_data . keywords . append ( key ) ;
return { } ;
}
2021-11-23 10:51:10 -05:00
static ErrorOr < void > parse_default_content_locales ( String core_path , UnicodeLocaleData & locale_data )
2021-11-17 08:20:19 -05:00
{
LexicalPath default_content_path ( move ( core_path ) ) ;
default_content_path = default_content_path . append ( " defaultContent.json " sv ) ;
2021-11-23 10:51:10 -05:00
auto default_content_file = TRY ( Core : : File : : open ( default_content_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto default_content = TRY ( JsonValue : : from_string ( default_content_file - > read_all ( ) ) ) ;
2021-11-17 08:20:19 -05:00
auto const & default_content_array = default_content . as_object ( ) . get ( " defaultContent " sv ) ;
default_content_array . as_array ( ) . for_each ( [ & ] ( JsonValue const & value ) {
auto locale = value . as_string ( ) ;
StringView default_locale = locale ;
while ( true ) {
if ( locale_data . locales . contains ( default_locale ) )
break ;
auto pos = default_locale . find_last ( ' - ' ) ;
if ( ! pos . has_value ( ) )
return ;
default_locale = default_locale . substring_view ( 0 , * pos ) ;
}
if ( default_locale ! = locale )
locale_data . locale_aliases . append ( { default_locale , move ( locale ) } ) ;
} ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-11-17 08:20:19 -05:00
}
2021-11-23 11:54:53 -05:00
static ErrorOr < void > define_aliases_without_scripts ( UnicodeLocaleData & locale_data )
2021-11-17 09:56:16 -05:00
{
// From ECMA-402: https://tc39.es/ecma402/#sec-internal-slots
//
// For locales that include a script subtag in addition to language and region, the
// corresponding locale without a script subtag must also be supported.
//
// So we define aliases for locales that contain all three subtags, but we must also take
// care to handle when the locale itself or the locale without a script subtag are an alias
// by way of default-content locales.
auto find_alias = [ & ] ( auto const & locale ) {
return locale_data . locale_aliases . find_if ( [ & ] ( auto const & alias ) { return locale = = alias . alias ; } ) ;
} ;
2021-11-23 11:54:53 -05:00
auto append_alias_without_script = [ & ] ( auto const & locale ) - > ErrorOr < void > {
auto parsed_locale = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , locale ) ) ;
if ( ( parsed_locale . language = = 0 ) | | ( parsed_locale . script = = 0 ) | | ( parsed_locale . region = = 0 ) )
return { } ;
2021-11-17 09:56:16 -05:00
auto locale_without_script = String : : formatted ( " {}-{} " ,
2021-11-23 11:54:53 -05:00
locale_data . unique_strings . get ( parsed_locale . language ) ,
locale_data . unique_strings . get ( parsed_locale . region ) ) ;
2021-11-17 09:56:16 -05:00
if ( locale_data . locales . contains ( locale_without_script ) )
2021-11-23 11:54:53 -05:00
return { } ;
2021-11-17 09:56:16 -05:00
if ( find_alias ( locale_without_script ) ! = locale_data . locale_aliases . end ( ) )
2021-11-23 11:54:53 -05:00
return { } ;
2021-11-17 09:56:16 -05:00
if ( auto it = find_alias ( locale ) ; it ! = locale_data . locale_aliases . end ( ) )
locale_data . locale_aliases . append ( { it - > name , locale_without_script } ) ;
else
locale_data . locale_aliases . append ( { locale , locale_without_script } ) ;
2021-11-23 11:54:53 -05:00
return { } ;
2021-11-17 09:56:16 -05:00
} ;
for ( auto const & locale : locale_data . locales )
2021-11-23 11:54:53 -05:00
TRY ( append_alias_without_script ( locale . key ) ) ;
2021-11-17 09:56:16 -05:00
for ( auto const & locale : locale_data . locale_aliases )
2021-11-23 11:54:53 -05:00
TRY ( append_alias_without_script ( locale . alias ) ) ;
return { } ;
2021-11-17 09:56:16 -05:00
}
2021-11-28 10:39:55 -05:00
static ErrorOr < void > parse_all_locales ( String core_path , String locale_names_path , String misc_path , String numbers_path , String dates_path , UnicodeLocaleData & locale_data )
2021-08-26 08:38:54 -04:00
{
2021-11-23 11:54:53 -05:00
auto identity_iterator = TRY ( path_to_dir_iterator ( locale_names_path ) ) ;
auto locale_names_iterator = TRY ( path_to_dir_iterator ( move ( locale_names_path ) ) ) ;
auto misc_iterator = TRY ( path_to_dir_iterator ( move ( misc_path ) ) ) ;
auto numbers_iterator = TRY ( path_to_dir_iterator ( move ( numbers_path ) ) ) ;
2021-11-28 10:39:55 -05:00
auto dates_iterator = TRY ( path_to_dir_iterator ( move ( dates_path ) ) ) ;
2021-08-26 08:38:54 -04:00
LibUnicode: Parse the CLDR's defaultContent.json locale list
This file contains the list of locales which default to their parent
locale's values. In the core CLDR dataset, these locales have their own
files, but they are empty (except for identity data). For example:
https://github.com/unicode-org/cldr/blob/main/common/main/en_US.xml
In the JSON export, these files are excluded, so we currently are not
recognizing these locales just by iterating the locale files.
This is a prerequisite for upgrading to CLDR version 40. One of these
default-content locales is the popular "en-US" locale, which defaults to
"en" values. We were previously inferring the existence of this locale
from the "en-US-POSIX" locale (many implementations, including ours,
strip variants such as POSIX). However, v40 removes the "en-US-POSIX"
locale entirely, meaning that without this change, we wouldn't know that
"en-US" exists (we would default to "en").
For more detail on this and other v40 changes, see:
https://cldr.unicode.org/index/downloads/cldr-40#h.nssoo2lq3cba
2021-11-09 13:32:23 -05:00
LexicalPath core_supplemental_path ( core_path ) ;
2021-08-30 14:56:23 -04:00
core_supplemental_path = core_supplemental_path . append ( " supplemental " sv ) ;
VERIFY ( Core : : File : : is_directory ( core_supplemental_path . string ( ) ) ) ;
2021-11-23 10:51:10 -05:00
TRY ( parse_core_aliases ( core_supplemental_path . string ( ) , locale_data ) ) ;
TRY ( parse_likely_subtags ( core_supplemental_path . string ( ) , locale_data ) ) ;
2021-08-30 14:56:23 -04:00
2021-11-23 11:54:53 -05:00
auto remove_variants_from_path = [ & ] ( String path ) - > ErrorOr < String > {
auto parsed_locale = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , LexicalPath : : basename ( path ) ) ) ;
2021-09-06 15:33:56 -04:00
StringBuilder builder ;
2021-11-23 11:54:53 -05:00
builder . append ( locale_data . unique_strings . get ( parsed_locale . language ) ) ;
if ( auto script = locale_data . unique_strings . get ( parsed_locale . script ) ; ! script . is_empty ( ) )
2021-10-10 13:56:33 -04:00
builder . appendff ( " -{} " , script ) ;
2021-11-23 11:54:53 -05:00
if ( auto region = locale_data . unique_strings . get ( parsed_locale . region ) ; ! region . is_empty ( ) )
2021-10-10 13:56:33 -04:00
builder . appendff ( " -{} " , region ) ;
2021-09-06 15:33:56 -04:00
return builder . build ( ) ;
} ;
2021-10-10 11:31:49 -04:00
while ( identity_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto locale_path = TRY ( next_path_from_dir_iterator ( identity_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( locale_path ) ) ;
2021-10-10 11:31:49 -04:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 10:51:10 -05:00
TRY ( parse_identity ( locale_path , locale_data , locale ) ) ;
2021-10-10 11:31:49 -04:00
}
2021-08-26 08:38:54 -04:00
while ( locale_names_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto locale_path = TRY ( next_path_from_dir_iterator ( locale_names_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( locale_path ) ) ;
2021-09-06 15:33:56 -04:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 10:51:10 -05:00
TRY ( parse_locale_languages ( locale_path , locale_data , locale ) ) ;
TRY ( parse_locale_territories ( locale_path , locale_data , locale ) ) ;
TRY ( parse_locale_scripts ( locale_path , locale_data , locale ) ) ;
2021-08-24 19:50:18 -04:00
}
2021-08-26 08:38:54 -04:00
2021-09-06 13:56:44 -04:00
while ( misc_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto misc_path = TRY ( next_path_from_dir_iterator ( misc_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( misc_path ) ) ;
2021-09-06 13:56:44 -04:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 10:51:10 -05:00
TRY ( parse_locale_list_patterns ( misc_path , locale_data , locale ) ) ;
2021-09-06 13:56:44 -04:00
}
2021-08-26 08:38:54 -04:00
while ( numbers_iterator . has_next ( ) ) {
2021-11-23 11:54:53 -05:00
auto numbers_path = TRY ( next_path_from_dir_iterator ( numbers_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( numbers_path ) ) ;
2021-09-06 15:33:56 -04:00
2021-11-23 11:54:53 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-23 10:51:10 -05:00
TRY ( parse_locale_currencies ( numbers_path , locale_data , locale ) ) ;
TRY ( parse_numeric_keywords ( numbers_path , locale_data , locale ) ) ;
2021-08-26 08:38:54 -04:00
}
LibUnicode: Parse the CLDR's defaultContent.json locale list
This file contains the list of locales which default to their parent
locale's values. In the core CLDR dataset, these locales have their own
files, but they are empty (except for identity data). For example:
https://github.com/unicode-org/cldr/blob/main/common/main/en_US.xml
In the JSON export, these files are excluded, so we currently are not
recognizing these locales just by iterating the locale files.
This is a prerequisite for upgrading to CLDR version 40. One of these
default-content locales is the popular "en-US" locale, which defaults to
"en" values. We were previously inferring the existence of this locale
from the "en-US-POSIX" locale (many implementations, including ours,
strip variants such as POSIX). However, v40 removes the "en-US-POSIX"
locale entirely, meaning that without this change, we wouldn't know that
"en-US" exists (we would default to "en").
For more detail on this and other v40 changes, see:
https://cldr.unicode.org/index/downloads/cldr-40#h.nssoo2lq3cba
2021-11-09 13:32:23 -05:00
2021-11-28 10:39:55 -05:00
while ( dates_iterator . has_next ( ) ) {
auto dates_path = TRY ( next_path_from_dir_iterator ( dates_iterator ) ) ;
auto language = TRY ( remove_variants_from_path ( dates_path ) ) ;
auto & locale = locale_data . locales . ensure ( language ) ;
TRY ( parse_calendar_keywords ( dates_path , locale_data , locale ) ) ;
}
2021-11-23 10:51:10 -05:00
TRY ( parse_default_content_locales ( move ( core_path ) , locale_data ) ) ;
2021-11-23 11:54:53 -05:00
TRY ( define_aliases_without_scripts ( locale_data ) ) ;
2021-11-23 10:51:10 -05:00
return { } ;
2021-08-24 19:50:18 -04:00
}
2021-08-26 06:42:11 -04:00
static String format_identifier ( StringView owner , String identifier )
2021-08-24 19:50:18 -04:00
{
2021-09-11 02:15:44 +03:00
identifier = identifier . replace ( " - " sv , " _ " sv , true ) ;
2021-08-26 06:42:11 -04:00
2021-08-24 19:50:18 -04:00
if ( all_of ( identifier , is_ascii_digit ) )
return String : : formatted ( " {}_{} " , owner [ 0 ] , identifier ) ;
2021-10-24 08:44:29 -04:00
if ( is_ascii_lower_alpha ( identifier [ 0 ] ) )
return String : : formatted ( " {:c}{} " , to_ascii_uppercase ( identifier [ 0 ] ) , identifier . substring_view ( 1 ) ) ;
return identifier ;
2021-08-24 19:50:18 -04:00
}
static void generate_unicode_locale_header ( Core : : File & file , UnicodeLocaleData & locale_data )
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . append ( R " ~~~(
# pragma once
# include <AK/Optional.h>
2021-08-26 06:56:17 -04:00
# include <AK/StringView.h>
2021-08-24 19:50:18 -04:00
# include <AK/Types.h>
2021-10-25 12:15:10 -04:00
# include <AK/Vector.h>
2021-08-24 19:50:18 -04:00
# include <LibUnicode/Forward.h>
namespace Unicode {
) ~ ~ ~ " );
2021-08-26 06:42:11 -04:00
auto locales = locale_data . locales . keys ( ) ;
2021-11-17 08:20:19 -05:00
generate_enum ( generator , format_identifier , " Locale " sv , " None " sv , locales , locale_data . locale_aliases ) ;
2021-11-12 08:48:21 -05:00
generate_enum ( generator , format_identifier , " Language " sv , { } , locale_data . languages ) ;
generate_enum ( generator , format_identifier , " Territory " sv , { } , locale_data . territories ) ;
generate_enum ( generator , format_identifier , " ScriptTag " sv , { } , locale_data . scripts ) ;
generate_enum ( generator , format_identifier , " Currency " sv , { } , locale_data . currencies ) ;
generate_enum ( generator , format_identifier , " Key " sv , { } , locale_data . keywords ) ;
generate_enum ( generator , format_identifier , " Variant " sv , { } , locale_data . variants ) ;
generate_enum ( generator , format_identifier , " ListPatternType " sv , { } , locale_data . list_pattern_types ) ;
generate_enum ( generator , format_identifier , " ListPatternStyle " sv , { } , locale_data . list_pattern_styles ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
namespace Detail {
2021-11-11 00:55:02 +01:00
Optional < Locale > locale_from_string ( StringView locale ) ;
2021-08-26 08:17:01 -04:00
Optional < StringView > get_locale_language_mapping ( StringView locale , StringView language ) ;
2021-11-11 00:55:02 +01:00
Optional < Language > language_from_string ( StringView language ) ;
Optional < StringView > resolve_language_alias ( StringView language ) ;
2021-08-26 06:56:17 -04:00
Optional < StringView > get_locale_territory_mapping ( StringView locale , StringView territory ) ;
2021-11-11 00:55:02 +01:00
Optional < Territory > territory_from_string ( StringView territory ) ;
Optional < StringView > resolve_territory_alias ( StringView territory ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 08:29:39 -04:00
Optional < StringView > get_locale_script_tag_mapping ( StringView locale , StringView script_tag ) ;
2021-11-11 00:55:02 +01:00
Optional < ScriptTag > script_tag_from_string ( StringView script_tag ) ;
Optional < StringView > resolve_script_tag_alias ( StringView script_tag ) ;
2021-08-26 08:29:39 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
Optional < StringView > get_locale_long_currency_mapping ( StringView locale , StringView currency ) ;
Optional < StringView > get_locale_short_currency_mapping ( StringView locale , StringView currency ) ;
Optional < StringView > get_locale_narrow_currency_mapping ( StringView locale , StringView currency ) ;
Optional < StringView > get_locale_numeric_currency_mapping ( StringView locale , StringView currency ) ;
2021-11-11 00:55:02 +01:00
Optional < Currency > currency_from_string ( StringView currency ) ;
2021-08-26 08:38:54 -04:00
2021-09-10 09:56:11 -04:00
Optional < StringView > get_locale_key_mapping ( StringView locale , StringView key ) ;
2021-11-11 00:55:02 +01:00
Optional < Key > key_from_string ( StringView key ) ;
2021-09-10 09:56:11 -04:00
2021-09-06 13:56:44 -04:00
Optional < ListPatterns > get_locale_list_pattern_mapping ( StringView locale , StringView list_pattern_type , StringView list_pattern_style ) ;
2021-11-11 00:55:02 +01:00
Optional < ListPatternType > list_pattern_type_from_string ( StringView list_pattern_type ) ;
Optional < ListPatternStyle > list_pattern_style_from_string ( StringView list_pattern_style ) ;
2021-09-06 13:56:44 -04:00
2021-11-11 00:55:02 +01:00
Optional < StringView > resolve_variant_alias ( StringView variant ) ;
Optional < StringView > resolve_subdivision_alias ( StringView subdivision ) ;
2021-08-30 14:56:23 -04:00
2021-08-31 10:03:49 -04:00
void resolve_complex_language_aliases ( Unicode : : LanguageID & language_id ) ;
2021-09-02 18:21:42 -04:00
Optional < Unicode : : LanguageID > add_likely_subtags ( Unicode : : LanguageID const & language_id ) ;
2021-08-31 09:40:24 -04:00
Optional < String > resolve_most_likely_territory ( Unicode : : LanguageID const & language_id ) ;
2021-08-31 10:03:49 -04:00
2021-08-24 19:50:18 -04:00
}
}
) ~ ~ ~ " );
2021-11-23 10:51:10 -05:00
VERIFY ( file . write ( generator . as_string_view ( ) ) ) ;
2021-08-24 19:50:18 -04:00
}
static void generate_unicode_locale_implementation ( Core : : File & file , UnicodeLocaleData & locale_data )
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
2021-10-14 18:31:09 -04:00
generator . set ( " string_index_type " sv , s_string_index_type ) ;
2021-08-24 19:50:18 -04:00
generator . set ( " locales_size " sv , String : : number ( locale_data . locales . size ( ) ) ) ;
generator . set ( " territories_size " , String : : number ( locale_data . territories . size ( ) ) ) ;
2021-09-02 17:46:35 -04:00
generator . set ( " variants_size " , String : : number ( locale_data . max_variant_size ) ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
# include <AK/Array.h>
2021-10-12 12:22:47 -04:00
# include <AK/BinarySearch.h>
2021-08-26 06:56:17 -04:00
# include <AK/Span.h>
2021-08-31 10:03:49 -04:00
# include <LibUnicode/Locale.h>
2021-08-24 19:50:18 -04:00
# include <LibUnicode/UnicodeLocale.h>
2021-10-25 12:15:10 -04:00
namespace Unicode : : Detail {
2021-09-06 13:56:44 -04:00
struct Patterns {
ListPatternType type ;
ListPatternStyle style ;
2021-10-14 18:31:09 -04:00
@ string_index_type @ start { 0 } ;
@ string_index_type @ middle { 0 } ;
@ string_index_type @ end { 0 } ;
@ string_index_type @ pair { 0 } ;
2021-09-06 13:56:44 -04:00
} ;
2021-10-10 11:41:13 -04:00
) ~ ~ ~ " );
2021-11-12 08:48:21 -05:00
locale_data . unique_strings . generate ( generator ) ;
2021-10-10 11:41:13 -04:00
2021-10-14 18:31:09 -04:00
auto append_index = [ & ] ( auto index ) {
2021-10-10 13:56:33 -04:00
generator . append ( String : : formatted ( " , {} " , index ) ) ;
2021-09-02 17:46:35 -04:00
} ;
auto append_list_and_size = [ & ] ( auto const & list ) {
if ( list . is_empty ( ) ) {
generator . append ( " , {}, 0 " ) ;
return ;
}
bool first = true ;
generator . append ( " , { " ) ;
for ( auto const & item : list ) {
generator . append ( first ? " " : " , " ) ;
2021-10-10 13:56:33 -04:00
generator . append ( String : : number ( item ) ) ;
2021-09-02 17:46:35 -04:00
first = false ;
}
generator . append ( String : : formatted ( " }}, {} " , list . size ( ) ) ) ;
} ;
2021-10-10 11:41:13 -04:00
auto append_string_index_list = [ & ] ( String name , auto const & keys , auto const & mappings ) {
2021-08-24 19:50:18 -04:00
generator . set ( " name " , name ) ;
2021-08-26 06:56:17 -04:00
generator . set ( " size " , String : : number ( keys . size ( ) ) ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
2021-10-14 18:31:09 -04:00
static constexpr Array < @ string_index_type @ , @ size @ > @ name @ { {
2021-08-26 06:56:17 -04:00
) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
2021-10-10 11:41:13 -04:00
constexpr size_t max_values_per_row = 30 ;
2021-08-26 06:56:17 -04:00
size_t values_in_current_row = 0 ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
for ( auto const & key : keys ) {
if ( values_in_current_row + + > 0 )
generator . append ( " " ) ;
if ( auto it = mappings . find ( key ) ; it ! = mappings . end ( ) )
2021-10-10 11:41:13 -04:00
generator . set ( " mapping " sv , String : : number ( it - > value ) ) ;
2021-08-26 06:56:17 -04:00
else
2021-10-10 11:41:13 -04:00
generator . set ( " mapping " sv , " 0 " sv ) ;
2021-08-26 06:56:17 -04:00
generator . append ( " @mapping@, " ) ;
if ( values_in_current_row = = max_values_per_row ) {
values_in_current_row = 0 ;
generator . append ( " \n " ) ;
}
2021-08-24 19:50:18 -04:00
}
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
2021-09-06 13:56:44 -04:00
auto append_list_patterns = [ & ] ( StringView name , Vector < ListPatterns > const & list_patterns ) {
generator . set ( " name " , name ) ;
generator . set ( " size " , String : : number ( list_patterns . size ( ) ) ) ;
generator . append ( R " ~~~(
static constexpr Array < Patterns , @ size @ > @ name @ { { ) ~ ~ ~ " );
for ( auto const & list_pattern : list_patterns ) {
generator . set ( " type " sv , String : : formatted ( " ListPatternType::{} " , format_identifier ( { } , list_pattern . type ) ) ) ;
generator . set ( " style " sv , String : : formatted ( " ListPatternStyle::{} " , format_identifier ( { } , list_pattern . style ) ) ) ;
2021-10-10 11:58:55 -04:00
generator . set ( " start " sv , String : : number ( list_pattern . start ) ) ;
generator . set ( " middle " sv , String : : number ( list_pattern . middle ) ) ;
generator . set ( " end " sv , String : : number ( list_pattern . end ) ) ;
generator . set ( " pair " sv , String : : number ( list_pattern . pair ) ) ;
2021-09-06 13:56:44 -04:00
generator . append ( R " ~~~(
{ @ type @ , @ style @ , @ start @ , @ middle @ , @ end @ , @ pair @ } , ) ~ ~ ~ " );
}
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
2021-11-12 08:48:21 -05:00
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_languages " sv , " s_languages_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . languages , value . languages ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_territories " sv , " s_territories_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . territories , value . territories ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_scripts " sv , " s_scripts_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . scripts , value . scripts ) ; } ) ;
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_long_currencies " sv , " s_long_currencies_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . currencies , value . long_currencies ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_short_currencies " sv , " s_short_currencies_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . currencies , value . short_currencies ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_narrow_currencies " sv , " s_narrow_currencies_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . currencies , value . narrow_currencies ) ; } ) ;
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_numeric_currencies " sv , " s_numeric_currencies_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . currencies , value . numeric_currencies ) ; } ) ;
2021-11-12 08:48:21 -05:00
generate_mapping ( generator , locale_data . locales , s_string_index_type , " s_keywords " sv , " s_keywords_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_index_list ( name , locale_data . keywords , value . keywords ) ; } ) ;
generate_mapping ( generator , locale_data . locales , " Patterns " sv , " s_list_patterns " sv , " s_list_patterns_{} " , [ & ] ( auto const & name , auto const & value ) { append_list_patterns ( name , value . list_patterns ) ; } ) ;
2021-08-24 19:50:18 -04:00
2021-08-31 10:03:49 -04:00
generator . append ( R " ~~~(
2021-09-02 17:46:35 -04:00
struct CanonicalLanguageID {
Unicode : : LanguageID to_unicode_language_id ( ) const
{
Unicode : : LanguageID language_id { } ;
language_id . variants . ensure_capacity ( variants_size ) ;
2021-10-10 13:56:33 -04:00
language_id . language = s_string_list [ language ] ;
if ( script ! = 0 )
language_id . script = s_string_list [ script ] ;
if ( region ! = 0 )
language_id . region = s_string_list [ region ] ;
2021-09-02 17:46:35 -04:00
for ( size_t i = 0 ; i < variants_size ; + + i )
2021-10-10 13:56:33 -04:00
language_id . variants . append ( s_string_list [ variants [ i ] ] ) ;
2021-09-02 17:46:35 -04:00
return language_id ;
}
bool matches_variants ( Vector < String > const & other_variants ) const {
if ( variants_size = = 0 )
return true ;
if ( other_variants . size ( ) ! = variants_size )
return false ;
for ( size_t i = 0 ; i < variants_size ; + + i ) {
2021-10-10 13:56:33 -04:00
if ( s_string_list [ variants [ i ] ] ! = other_variants [ i ] )
2021-09-02 17:46:35 -04:00
return false ;
}
return true ;
} ;
2021-10-14 18:31:09 -04:00
@ string_index_type @ language { 0 } ;
@ string_index_type @ script { 0 } ;
@ string_index_type @ region { 0 } ;
Array < @ string_index_type @ , @ variants_size @ > variants { } ;
2021-09-02 17:46:35 -04:00
size_t variants_size { 0 } ;
} ;
2021-08-31 10:03:49 -04:00
struct LanguageMapping {
2021-09-02 17:46:35 -04:00
CanonicalLanguageID key ;
CanonicalLanguageID alias ;
2021-08-31 10:03:49 -04:00
} ;
) ~ ~ ~ " );
2021-09-02 17:46:35 -04:00
auto append_complex_mapping = [ & ] ( StringView name , auto & mappings ) {
generator . set ( " size " , String : : number ( mappings . size ( ) ) ) ;
2021-08-31 10:03:49 -04:00
generator . set ( " name " sv , name ) ;
2021-09-02 17:46:35 -04:00
generator . append ( R " ~~~(
static constexpr Array < LanguageMapping , @ size @ > s_ @ name @ { {
2021-08-31 10:03:49 -04:00
) ~ ~ ~ " );
2021-10-10 13:56:33 -04:00
quick_sort ( mappings , [ & ] ( auto const & lhs , auto const & rhs ) {
2021-11-12 08:48:21 -05:00
auto const & lhs_language = locale_data . unique_strings . get ( lhs . key . language ) ;
auto const & rhs_language = locale_data . unique_strings . get ( rhs . key . language ) ;
2021-09-02 17:46:35 -04:00
2021-08-31 10:03:49 -04:00
// Sort the keys such that "und" language tags are at the end, as those are less specific.
2021-09-02 17:46:35 -04:00
if ( lhs_language . starts_with ( " und " sv ) & & ! rhs_language . starts_with ( " und " sv ) )
2021-08-31 10:03:49 -04:00
return false ;
2021-09-02 17:46:35 -04:00
if ( ! lhs_language . starts_with ( " und " sv ) & & rhs_language . starts_with ( " und " sv ) )
2021-08-31 10:03:49 -04:00
return true ;
2021-09-02 17:46:35 -04:00
return lhs_language < rhs_language ;
2021-08-31 10:03:49 -04:00
} ) ;
2021-09-02 17:46:35 -04:00
for ( auto const & mapping : mappings ) {
2021-10-10 13:56:33 -04:00
generator . set ( " language " sv , String : : number ( mapping . key . language ) ) ;
generator . append ( " { { @language@ " ) ;
2021-08-31 10:03:49 -04:00
2021-10-10 13:56:33 -04:00
append_index ( mapping . key . script ) ;
append_index ( mapping . key . region ) ;
2021-09-02 17:46:35 -04:00
append_list_and_size ( mapping . key . variants ) ;
2021-08-31 10:03:49 -04:00
2021-10-10 13:56:33 -04:00
generator . set ( " language " sv , String : : number ( mapping . alias . language ) ) ;
generator . append ( " }, { @language@ " ) ;
2021-09-02 17:46:35 -04:00
2021-10-10 13:56:33 -04:00
append_index ( mapping . alias . script ) ;
append_index ( mapping . alias . region ) ;
2021-09-02 17:46:35 -04:00
append_list_and_size ( mapping . alias . variants ) ;
generator . append ( " } }, \n " ) ;
}
generator . append ( " } }; \n " ) ;
2021-08-31 10:03:49 -04:00
} ;
append_complex_mapping ( " complex_alias " sv , locale_data . complex_mappings ) ;
2021-08-31 09:40:24 -04:00
append_complex_mapping ( " likely_subtags " sv , locale_data . likely_subtags ) ;
2021-08-31 10:03:49 -04:00
2021-08-26 06:56:17 -04:00
generator . append ( R " ~~~(
2021-09-02 18:21:42 -04:00
static LanguageMapping const * resolve_likely_subtag ( Unicode : : LanguageID const & language_id )
2021-08-31 09:40:24 -04:00
{
// https://unicode.org/reports/tr35/#Likely_Subtags
enum class State {
LanguageScriptRegion ,
LanguageRegion ,
LanguageScript ,
Language ,
UndScript ,
Done ,
} ;
auto state = State : : LanguageScriptRegion ;
while ( state ! = State : : Done ) {
2021-10-10 13:56:33 -04:00
Unicode : : LanguageID search_key ;
2021-08-31 09:40:24 -04:00
switch ( state ) {
case State : : LanguageScriptRegion :
state = State : : LanguageRegion ;
if ( ! language_id . script . has_value ( ) | | ! language_id . region . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . script = * language_id . script ;
search_key . region = * language_id . region ;
2021-08-31 09:40:24 -04:00
break ;
case State : : LanguageRegion :
state = State : : LanguageScript ;
if ( ! language_id . region . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . region = * language_id . region ;
2021-08-31 09:40:24 -04:00
break ;
case State : : LanguageScript :
state = State : : Language ;
if ( ! language_id . script . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . script = * language_id . script ;
2021-08-31 09:40:24 -04:00
break ;
case State : : Language :
state = State : : UndScript ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
2021-08-31 09:40:24 -04:00
break ;
case State : : UndScript :
state = State : : Done ;
if ( ! language_id . script . has_value ( ) )
continue ;
search_key . language = " und " sv ;
2021-09-02 17:46:35 -04:00
search_key . script = * language_id . script ;
2021-08-31 09:40:24 -04:00
break ;
default :
VERIFY_NOT_REACHED ( ) ;
}
2021-09-02 17:46:35 -04:00
for ( auto const & map : s_likely_subtags ) {
2021-10-10 13:56:33 -04:00
auto const & key_language = s_string_list [ map . key . language ] ;
auto const & key_script = s_string_list [ map . key . script ] ;
auto const & key_region = s_string_list [ map . key . region ] ;
if ( key_language ! = search_key . language )
2021-08-31 09:40:24 -04:00
continue ;
2021-10-10 13:56:33 -04:00
if ( ! key_script . is_empty ( ) | | search_key . script . has_value ( ) ) {
if ( key_script ! = search_key . script )
continue ;
}
if ( ! key_region . is_empty ( ) | | search_key . region . has_value ( ) ) {
if ( key_region ! = search_key . region )
continue ;
}
2021-08-31 09:40:24 -04:00
2021-09-02 18:21:42 -04:00
return & map ;
2021-08-31 09:40:24 -04:00
}
}
return nullptr ;
}
2021-08-26 06:56:17 -04:00
) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
auto append_mapping_search = [ & ] ( StringView enum_snake , StringView from_string_name , StringView collection_name ) {
2021-08-26 06:56:17 -04:00
generator . set ( " enum_snake " , enum_snake ) ;
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
generator . set ( " from_string_name " , from_string_name ) ;
2021-08-26 06:56:17 -04:00
generator . set ( " collection_name " , collection_name ) ;
generator . append ( R " ~~~(
Optional < StringView > get_locale_ @ enum_snake @ _mapping ( StringView locale , StringView @ enum_snake @ )
2021-08-24 19:50:18 -04:00
{
2021-08-26 06:56:17 -04:00
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return { } ;
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
auto @ enum_snake @ _value = @ from_string_name @ _from_string ( @ enum_snake @ ) ;
2021-08-26 06:56:17 -04:00
if ( ! @ enum_snake @ _value . has_value ( ) )
return { } ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto @ enum_snake @ _index = to_underlying ( * @ enum_snake @ _value ) ;
auto const & mappings = @ collection_name @ . at ( locale_index ) ;
2021-10-10 11:41:13 -04:00
auto @ enum_snake @ _string_index = mappings . at ( @ enum_snake @ _index ) ;
auto @ enum_snake @ _mapping = s_string_list . at ( @ enum_snake @ _string_index ) ;
2021-08-26 06:56:17 -04:00
if ( @ enum_snake @ _mapping . is_empty ( ) )
return { } ;
return @ enum_snake @ _mapping ;
2021-08-24 19:50:18 -04:00
}
) ~ ~ ~ " );
2021-08-26 06:56:17 -04:00
} ;
2021-08-24 19:50:18 -04:00
2021-11-17 08:20:19 -05:00
auto append_from_string = [ & ] ( StringView enum_title , StringView enum_snake , auto const & values , Vector < Alias > const & aliases = { } ) {
2021-10-12 12:22:47 -04:00
HashValueMap < String > hashes ;
hashes . ensure_capacity ( values . size ( ) ) ;
2021-08-24 19:50:18 -04:00
2021-10-12 12:22:47 -04:00
for ( auto const & value : values )
hashes . set ( value . hash ( ) , format_identifier ( enum_title , value ) ) ;
2021-11-17 08:20:19 -05:00
for ( auto const & alias : aliases )
hashes . set ( alias . alias . hash ( ) , format_identifier ( enum_title , alias . alias ) ) ;
2021-08-24 19:50:18 -04:00
2021-10-12 12:22:47 -04:00
generate_value_from_string ( generator , " {}_from_string " sv , enum_title , enum_snake , move ( hashes ) ) ;
2021-08-24 19:50:18 -04:00
} ;
2021-10-10 14:09:11 -04:00
auto append_alias_search = [ & ] ( StringView enum_snake , auto const & aliases ) {
2021-10-14 18:31:09 -04:00
HashValueMap < StringIndexType > hashes ;
2021-10-12 12:22:47 -04:00
hashes . ensure_capacity ( aliases . size ( ) ) ;
2021-08-30 14:56:23 -04:00
2021-10-12 12:22:47 -04:00
for ( auto const & alias : aliases )
hashes . set ( alias . key . hash ( ) , alias . value ) ;
2021-08-31 10:03:49 -04:00
2021-10-14 18:31:09 -04:00
generate_value_from_string ( generator , " resolve_{}_alias " sv , s_string_index_type , enum_snake , move ( hashes ) , " StringView " sv , " s_string_list[{}] " sv ) ;
2021-08-30 14:56:23 -04:00
} ;
2021-11-17 08:20:19 -05:00
append_from_string ( " Locale " sv , " locale " sv , locale_data . locales . keys ( ) , locale_data . locale_aliases ) ;
2021-08-26 08:17:01 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
append_mapping_search ( " language " sv , " language " sv , " s_languages " sv ) ;
2021-08-24 19:50:18 -04:00
append_from_string ( " Language " sv , " language " sv , locale_data . languages ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " language " sv , locale_data . language_aliases ) ;
2021-08-26 06:56:17 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
append_mapping_search ( " territory " sv , " territory " sv , " s_territories " sv ) ;
2021-08-24 19:50:18 -04:00
append_from_string ( " Territory " sv , " territory " sv , locale_data . territories ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " territory " sv , locale_data . territory_aliases ) ;
2021-08-24 19:50:18 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
append_mapping_search ( " script_tag " sv , " script_tag " sv , " s_scripts " sv ) ;
2021-08-26 08:29:39 -04:00
append_from_string ( " ScriptTag " sv , " script_tag " sv , locale_data . scripts ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " script_tag " sv , locale_data . script_aliases ) ;
2021-08-26 08:29:39 -04:00
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
append_mapping_search ( " long_currency " sv , " currency " sv , " s_long_currencies " sv ) ;
append_mapping_search ( " short_currency " sv , " currency " sv , " s_short_currencies " sv ) ;
append_mapping_search ( " narrow_currency " sv , " currency " sv , " s_narrow_currencies " sv ) ;
append_mapping_search ( " numeric_currency " sv , " currency " sv , " s_numeric_currencies " sv ) ;
2021-08-26 08:38:54 -04:00
append_from_string ( " Currency " sv , " currency " sv , locale_data . currencies ) ;
LibJS+LibUnicode: Generate all styles of currency localizations
Currently, LibUnicode is only parsing and generating the "long" style of
currency display names. However, the CLDR contains "short" and "narrow"
forms as well that need to be handled. Parse these, and update LibJS to
actually respect the "style" option provided by the user for displaying
currencies with Intl.DisplayNames.
Note: There are some discrepencies between the engines on how style is
handled. In particular, running:
new Intl.DisplayNames('en', {type:'currency', style:'narrow'}).of('usd')
Gives:
SpiderMoney: "USD"
V8: "US Dollar"
LibJS: "$"
And running:
new Intl.DisplayNames('en', {type:'currency', style:'short'}).of('usd')
Gives:
SpiderMonkey: "$"
V8: "US Dollar"
LibJS: "$"
My best guess is V8 isn't handling style, and just returning the long
form (which is what LibJS did before this commit). And SpiderMoney can
handle some styles, but if they don't have a value for the requested
style, they fall back to the canonicalized code passed into of().
2021-11-12 13:11:30 -05:00
append_mapping_search ( " key " sv , " key " sv , " s_keywords " sv ) ;
2021-09-10 09:56:11 -04:00
append_from_string ( " Key " sv , " key " sv , locale_data . keywords ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " variant " sv , locale_data . variant_aliases ) ;
append_alias_search ( " subdivision " sv , locale_data . subdivision_aliases ) ;
2021-09-06 13:56:44 -04:00
append_from_string ( " ListPatternType " sv , " list_pattern_type " sv , locale_data . list_pattern_types ) ;
append_from_string ( " ListPatternStyle " sv , " list_pattern_style " sv , locale_data . list_pattern_styles ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
2021-09-06 13:56:44 -04:00
Optional < ListPatterns > get_locale_list_pattern_mapping ( StringView locale , StringView list_pattern_type , StringView list_pattern_style )
{
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return { } ;
auto type_value = list_pattern_type_from_string ( list_pattern_type ) ;
if ( ! type_value . has_value ( ) )
return { } ;
auto style_value = list_pattern_style_from_string ( list_pattern_style ) ;
if ( ! style_value . has_value ( ) )
return { } ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto const & locale_list_patterns = s_list_patterns . at ( locale_index ) ;
for ( auto const & list_patterns : locale_list_patterns ) {
2021-10-10 11:58:55 -04:00
if ( ( list_patterns . type = = type_value ) & & ( list_patterns . style = = style_value ) ) {
auto const & start = s_string_list [ list_patterns . start ] ;
auto const & middle = s_string_list [ list_patterns . middle ] ;
auto const & end = s_string_list [ list_patterns . end ] ;
auto const & pair = s_string_list [ list_patterns . pair ] ;
return ListPatterns { start , middle , end , pair } ;
}
2021-09-06 13:56:44 -04:00
}
return { } ;
}
2021-08-31 10:03:49 -04:00
void resolve_complex_language_aliases ( Unicode : : LanguageID & language_id )
{
2021-09-02 17:46:35 -04:00
for ( auto const & map : s_complex_alias ) {
2021-10-10 13:56:33 -04:00
auto const & key_language = s_string_list [ map . key . language ] ;
auto const & key_script = s_string_list [ map . key . script ] ;
auto const & key_region = s_string_list [ map . key . region ] ;
if ( ( key_language ! = language_id . language ) & & ( key_language ! = " und " sv ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-10-10 13:56:33 -04:00
if ( ! key_script . is_empty ( ) & & ( key_script ! = language_id . script ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-10-10 13:56:33 -04:00
if ( ! key_region . is_empty ( ) & & ( key_region ! = language_id . region ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-09-02 17:46:35 -04:00
if ( ! map . key . matches_variants ( language_id . variants ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-09-02 17:46:35 -04:00
auto alias = map . alias . to_unicode_language_id ( ) ;
2021-08-31 10:03:49 -04:00
if ( alias . language = = " und " sv )
alias . language = move ( language_id . language ) ;
2021-10-10 13:56:33 -04:00
if ( key_script . is_empty ( ) & & ! alias . script . has_value ( ) )
2021-08-31 10:03:49 -04:00
alias . script = move ( language_id . script ) ;
2021-10-10 13:56:33 -04:00
if ( key_region . is_empty ( ) & & ! alias . region . has_value ( ) )
2021-08-31 10:03:49 -04:00
alias . region = move ( language_id . region ) ;
2021-09-02 17:46:35 -04:00
if ( map . key . variants_size = = 0 & & alias . variants . is_empty ( ) )
2021-08-31 10:03:49 -04:00
alias . variants = move ( language_id . variants ) ;
language_id = move ( alias ) ;
break ;
}
}
2021-09-02 18:21:42 -04:00
Optional < Unicode : : LanguageID > add_likely_subtags ( Unicode : : LanguageID const & language_id )
{
// https://www.unicode.org/reports/tr35/#Likely_Subtags
auto const * likely_subtag = resolve_likely_subtag ( language_id ) ;
if ( likely_subtag = = nullptr )
return { } ;
auto maximized = language_id ;
2021-10-10 13:56:33 -04:00
auto const & key_script = s_string_list [ likely_subtag - > key . script ] ;
auto const & key_region = s_string_list [ likely_subtag - > key . region ] ;
auto const & alias_language = s_string_list [ likely_subtag - > alias . language ] ;
auto const & alias_script = s_string_list [ likely_subtag - > alias . script ] ;
auto const & alias_region = s_string_list [ likely_subtag - > alias . region ] ;
2021-09-02 18:21:42 -04:00
if ( maximized . language = = " und " sv )
2021-10-10 13:56:33 -04:00
maximized . language = alias_language ;
if ( ! maximized . script . has_value ( ) | | ( ! key_script . is_empty ( ) & & ! alias_script . is_empty ( ) ) )
maximized . script = alias_script ;
if ( ! maximized . region . has_value ( ) | | ( ! key_region . is_empty ( ) & & ! alias_region . is_empty ( ) ) )
maximized . region = alias_region ;
2021-09-02 18:21:42 -04:00
return maximized ;
}
2021-08-31 09:40:24 -04:00
Optional < String > resolve_most_likely_territory ( Unicode : : LanguageID const & language_id )
{
if ( auto const * likely_subtag = resolve_likely_subtag ( language_id ) ; likely_subtag ! = nullptr )
2021-10-10 13:56:33 -04:00
return s_string_list [ likely_subtag - > alias . region ] ;
2021-08-31 09:40:24 -04:00
return { } ;
}
2021-08-24 19:50:18 -04:00
}
) ~ ~ ~ " );
2021-11-23 10:51:10 -05:00
VERIFY ( file . write ( generator . as_string_view ( ) ) ) ;
2021-08-24 19:50:18 -04:00
}
2021-11-23 10:51:10 -05:00
ErrorOr < int > serenity_main ( Main : : Arguments arguments )
2021-08-24 19:50:18 -04:00
{
2021-11-28 10:39:55 -05:00
StringView generated_header_path ;
StringView generated_implementation_path ;
StringView core_path ;
StringView locale_names_path ;
StringView misc_path ;
StringView numbers_path ;
StringView dates_path ;
2021-08-24 19:50:18 -04:00
Core : : ArgsParser args_parser ;
args_parser . add_option ( generated_header_path , " Path to the Unicode locale header file to generate " , " generated-header-path " , ' h ' , " generated-header-path " ) ;
args_parser . add_option ( generated_implementation_path , " Path to the Unicode locale implementation file to generate " , " generated-implementation-path " , ' c ' , " generated-implementation-path " ) ;
2021-08-30 08:56:28 -04:00
args_parser . add_option ( core_path , " Path to cldr-core directory " , " core-path " , ' r ' , " core-path " ) ;
2021-08-24 19:50:18 -04:00
args_parser . add_option ( locale_names_path , " Path to cldr-localenames directory " , " locale-names-path " , ' l ' , " locale-names-path " ) ;
2021-09-05 22:46:03 -04:00
args_parser . add_option ( misc_path , " Path to cldr-misc directory " , " misc-path " , ' m ' , " misc-path " ) ;
2021-08-26 08:31:31 -04:00
args_parser . add_option ( numbers_path , " Path to cldr-numbers directory " , " numbers-path " , ' n ' , " numbers-path " ) ;
2021-11-28 10:39:55 -05:00
args_parser . add_option ( dates_path , " Path to cldr-dates directory " , " dates-path " , ' d ' , " dates-path " ) ;
2021-11-23 10:51:10 -05:00
args_parser . parse ( arguments ) ;
2021-08-24 19:50:18 -04:00
2021-11-23 10:51:10 -05:00
auto open_file = [ & ] ( StringView path ) - > ErrorOr < NonnullRefPtr < Core : : File > > {
2021-08-24 19:50:18 -04:00
if ( path . is_empty ( ) ) {
2021-11-23 10:51:10 -05:00
args_parser . print_usage ( stderr , arguments . argv [ 0 ] ) ;
return Error : : from_string_literal ( " Must provide all command line options " sv ) ;
2021-08-24 19:50:18 -04:00
}
2021-11-23 10:51:10 -05:00
return Core : : File : : open ( path , Core : : OpenMode : : ReadWrite ) ;
2021-08-24 19:50:18 -04:00
} ;
2021-11-23 10:51:10 -05:00
auto generated_header_file = TRY ( open_file ( generated_header_path ) ) ;
auto generated_implementation_file = TRY ( open_file ( generated_implementation_path ) ) ;
2021-08-24 19:50:18 -04:00
UnicodeLocaleData locale_data ;
2021-11-28 10:39:55 -05:00
TRY ( parse_all_locales ( core_path , locale_names_path , misc_path , numbers_path , dates_path , locale_data ) ) ;
2021-08-24 19:50:18 -04:00
generate_unicode_locale_header ( generated_header_file , locale_data ) ;
generate_unicode_locale_implementation ( generated_implementation_file , locale_data ) ;
return 0 ;
}