2021-08-24 19:50:18 -04:00
/*
* Copyright ( c ) 2021 , Tim Flynn < trflynn89 @ pm . me >
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
# include <AK/AllOf.h>
# include <AK/CharacterTypes.h>
# include <AK/Format.h>
# include <AK/HashMap.h>
# include <AK/JsonObject.h>
# include <AK/JsonParser.h>
# include <AK/JsonValue.h>
# include <AK/LexicalPath.h>
# include <AK/QuickSort.h>
# include <AK/SourceGenerator.h>
# include <AK/String.h>
# include <AK/StringBuilder.h>
# include <LibCore/ArgsParser.h>
# include <LibCore/DirIterator.h>
# include <LibCore/File.h>
2021-09-02 17:46:35 -04:00
# include <LibUnicode/Locale.h>
2021-08-24 19:50:18 -04:00
2021-09-06 13:56:44 -04:00
struct ListPatterns {
String type ;
String style ;
String start ;
String middle ;
String end ;
String pair ;
} ;
2021-08-24 19:50:18 -04:00
struct Locale {
String language ;
Optional < String > territory ;
Optional < String > variant ;
2021-08-26 08:17:01 -04:00
HashMap < String , String > languages ;
2021-08-24 19:50:18 -04:00
HashMap < String , String > territories ;
2021-08-26 08:29:39 -04:00
HashMap < String , String > scripts ;
2021-08-26 08:38:54 -04:00
HashMap < String , String > currencies ;
2021-09-10 09:56:11 -04:00
HashMap < String , String > keywords ;
2021-09-06 13:56:44 -04:00
Vector < ListPatterns > list_patterns ;
2021-08-24 19:50:18 -04:00
} ;
2021-09-02 17:46:35 -04:00
struct CanonicalLanguageID {
String language { } ;
String script { } ;
String region { } ;
Vector < String > variants { } ;
} ;
struct LanguageMapping {
CanonicalLanguageID key { } ;
CanonicalLanguageID alias { } ;
} ;
2021-08-24 19:50:18 -04:00
struct UnicodeLocaleData {
HashMap < String , Locale > locales ;
Vector < String > languages ;
Vector < String > territories ;
2021-08-26 08:29:39 -04:00
Vector < String > scripts ;
2021-08-24 19:50:18 -04:00
Vector < String > variants ;
2021-08-26 08:38:54 -04:00
Vector < String > currencies ;
2021-09-10 09:56:11 -04:00
Vector < String > keywords ;
2021-09-06 13:56:44 -04:00
Vector < String > list_pattern_types ;
Vector < String > list_pattern_styles ;
2021-08-30 14:56:23 -04:00
HashMap < String , String > language_aliases ;
HashMap < String , String > territory_aliases ;
HashMap < String , String > script_aliases ;
HashMap < String , String > variant_aliases ;
HashMap < String , String > subdivision_aliases ;
2021-09-02 17:46:35 -04:00
Vector < LanguageMapping > complex_mappings ;
Vector < LanguageMapping > likely_subtags ;
size_t max_variant_size { 0 } ;
2021-08-24 19:50:18 -04:00
} ;
static void write_to_file_if_different ( Core : : File & file , StringView contents )
{
auto const current_contents = file . read_all ( ) ;
if ( StringView { current_contents . bytes ( ) } = = contents )
return ;
VERIFY ( file . seek ( 0 ) ) ;
VERIFY ( file . truncate ( 0 ) ) ;
VERIFY ( file . write ( contents ) ) ;
}
2021-09-02 17:46:35 -04:00
static Optional < CanonicalLanguageID > parse_language ( StringView language )
{
CanonicalLanguageID language_id { } ;
auto segments = language . split_view ( ' - ' ) ;
VERIFY ( ! segments . is_empty ( ) ) ;
size_t index = 0 ;
if ( Unicode : : is_unicode_language_subtag ( segments [ index ] ) ) {
language_id . language = segments [ index ] ;
if ( segments . size ( ) = = + + index )
return language_id ;
} else {
return { } ;
}
if ( Unicode : : is_unicode_script_subtag ( segments [ index ] ) ) {
language_id . script = segments [ index ] ;
if ( segments . size ( ) = = + + index )
return language_id ;
}
if ( Unicode : : is_unicode_region_subtag ( segments [ index ] ) ) {
language_id . region = segments [ index ] ;
if ( segments . size ( ) = = + + index )
return language_id ;
}
while ( index < segments . size ( ) ) {
if ( ! Unicode : : is_unicode_variant_subtag ( segments [ index ] ) )
return { } ;
language_id . variants . append ( segments [ index + + ] ) ;
}
return language_id ;
}
static Optional < LanguageMapping > parse_language_mapping ( StringView key , StringView alias )
{
auto parsed_key = parse_language ( key ) ;
if ( ! parsed_key . has_value ( ) )
return { } ;
auto parsed_alias = parse_language ( alias ) ;
if ( ! parsed_alias . has_value ( ) )
return { } ;
return LanguageMapping { parsed_key . release_value ( ) , parsed_alias . release_value ( ) } ;
}
2021-08-30 14:56:23 -04:00
static void parse_core_aliases ( String core_supplemental_path , UnicodeLocaleData & locale_data )
{
LexicalPath core_aliases_path ( move ( core_supplemental_path ) ) ;
core_aliases_path = core_aliases_path . append ( " aliases.json " sv ) ;
VERIFY ( Core : : File : : exists ( core_aliases_path . string ( ) ) ) ;
auto core_aliases_file_or_error = Core : : File : : open ( core_aliases_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! core_aliases_file_or_error . is_error ( ) ) ;
auto core_aliases = JsonParser ( core_aliases_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( core_aliases . has_value ( ) ) ;
auto const & supplemental_object = core_aliases - > as_object ( ) . get ( " supplemental " sv ) ;
auto const & metadata_object = supplemental_object . as_object ( ) . get ( " metadata " sv ) ;
auto const & alias_object = metadata_object . as_object ( ) . get ( " alias " sv ) ;
2021-09-02 17:46:35 -04:00
auto append_aliases = [ & ] ( auto & alias_object , auto & alias_map ) {
2021-08-30 14:56:23 -04:00
alias_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto alias = value . as_object ( ) . get ( " _replacement " sv ) . as_string ( ) ;
2021-08-31 10:03:49 -04:00
2021-09-02 17:46:35 -04:00
if ( key . contains ( ' - ' ) ) {
auto mapping = parse_language_mapping ( key , alias ) ;
if ( ! mapping . has_value ( ) )
return ;
2021-08-31 09:40:24 -04:00
2021-09-02 17:46:35 -04:00
locale_data . max_variant_size = max ( mapping - > key . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . max_variant_size = max ( mapping - > alias . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . complex_mappings . append ( mapping . release_value ( ) ) ;
} else {
2021-08-31 10:03:49 -04:00
alias_map . set ( key , move ( alias ) ) ;
2021-09-02 17:46:35 -04:00
}
2021-08-30 14:56:23 -04:00
} ) ;
} ;
append_aliases ( alias_object . as_object ( ) . get ( " languageAlias " sv ) , locale_data . language_aliases ) ;
2021-09-02 17:46:35 -04:00
append_aliases ( alias_object . as_object ( ) . get ( " territoryAlias " sv ) , locale_data . territory_aliases ) ;
2021-08-30 14:56:23 -04:00
append_aliases ( alias_object . as_object ( ) . get ( " scriptAlias " sv ) , locale_data . script_aliases ) ;
append_aliases ( alias_object . as_object ( ) . get ( " variantAlias " sv ) , locale_data . variant_aliases ) ;
append_aliases ( alias_object . as_object ( ) . get ( " subdivisionAlias " sv ) , locale_data . subdivision_aliases ) ;
}
2021-08-31 09:40:24 -04:00
static void parse_likely_subtags ( String core_supplemental_path , UnicodeLocaleData & locale_data )
{
LexicalPath likely_subtags_path ( move ( core_supplemental_path ) ) ;
likely_subtags_path = likely_subtags_path . append ( " likelySubtags.json " sv ) ;
VERIFY ( Core : : File : : exists ( likely_subtags_path . string ( ) ) ) ;
auto likely_subtags_file_or_error = Core : : File : : open ( likely_subtags_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! likely_subtags_file_or_error . is_error ( ) ) ;
auto likely_subtags = JsonParser ( likely_subtags_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( likely_subtags . has_value ( ) ) ;
auto const & supplemental_object = likely_subtags - > as_object ( ) . get ( " supplemental " sv ) ;
auto const & likely_subtags_object = supplemental_object . as_object ( ) . get ( " likelySubtags " sv ) ;
likely_subtags_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
2021-09-02 17:46:35 -04:00
auto mapping = parse_language_mapping ( key , value . as_string ( ) ) ;
if ( ! mapping . has_value ( ) )
return ;
locale_data . max_variant_size = max ( mapping - > key . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . max_variant_size = max ( mapping - > alias . variants . size ( ) , locale_data . max_variant_size ) ;
locale_data . likely_subtags . append ( mapping . release_value ( ) ) ;
2021-08-31 09:40:24 -04:00
} ) ;
}
2021-08-24 19:50:18 -04:00
static void parse_identity ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
{
LexicalPath languages_path ( move ( locale_path ) ) ; // Note: Every JSON file defines identity data, so we can use any of them.
languages_path = languages_path . append ( " languages.json " sv ) ;
VERIFY ( Core : : File : : exists ( languages_path . string ( ) ) ) ;
auto languages_file_or_error = Core : : File : : open ( languages_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! languages_file_or_error . is_error ( ) ) ;
auto languages = JsonParser ( languages_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( languages . has_value ( ) ) ;
auto const & main_object = languages - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( languages_path . parent ( ) . basename ( ) ) ;
auto const & identity_object = locale_object . as_object ( ) . get ( " identity " sv ) ;
auto const & language_string = identity_object . as_object ( ) . get ( " language " sv ) ;
auto const & territory_string = identity_object . as_object ( ) . get ( " territory " sv ) ;
auto const & variant_string = identity_object . as_object ( ) . get ( " variant " sv ) ;
locale . language = language_string . as_string ( ) ;
if ( ! locale_data . languages . contains_slow ( locale . language ) )
locale_data . languages . append ( locale . language ) ;
if ( territory_string . is_string ( ) ) {
locale . territory = territory_string . as_string ( ) ;
if ( ! locale_data . territories . contains_slow ( * locale . territory ) )
locale_data . territories . append ( * locale . territory ) ;
}
if ( variant_string . is_string ( ) ) {
locale . variant = variant_string . as_string ( ) ;
if ( ! locale_data . variants . contains_slow ( * locale . variant ) )
locale_data . variants . append ( * locale . variant ) ;
}
}
2021-08-26 08:17:01 -04:00
static void parse_locale_languages ( String locale_path , Locale & locale )
{
LexicalPath languages_path ( move ( locale_path ) ) ;
languages_path = languages_path . append ( " languages.json " sv ) ;
VERIFY ( Core : : File : : exists ( languages_path . string ( ) ) ) ;
auto languages_file_or_error = Core : : File : : open ( languages_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! languages_file_or_error . is_error ( ) ) ;
auto languages = JsonParser ( languages_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( languages . has_value ( ) ) ;
auto const & main_object = languages - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( languages_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & languages_object = locale_display_names_object . as_object ( ) . get ( " languages " sv ) ;
languages_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
locale . languages . set ( key , value . as_string ( ) ) ;
} ) ;
}
2021-08-24 19:50:18 -04:00
static void parse_locale_territories ( String locale_path , Locale & locale )
{
LexicalPath territories_path ( move ( locale_path ) ) ;
territories_path = territories_path . append ( " territories.json " sv ) ;
VERIFY ( Core : : File : : exists ( territories_path . string ( ) ) ) ;
auto territories_file_or_error = Core : : File : : open ( territories_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! territories_file_or_error . is_error ( ) ) ;
auto territories = JsonParser ( territories_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( territories . has_value ( ) ) ;
auto const & main_object = territories - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( territories_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & territories_object = locale_display_names_object . as_object ( ) . get ( " territories " sv ) ;
territories_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
locale . territories . set ( key , value . as_string ( ) ) ;
} ) ;
}
2021-08-26 08:29:39 -04:00
static void parse_locale_scripts ( String locale_path , UnicodeLocaleData & locale_data , Locale & locale )
{
LexicalPath scripts_path ( move ( locale_path ) ) ;
scripts_path = scripts_path . append ( " scripts.json " sv ) ;
VERIFY ( Core : : File : : exists ( scripts_path . string ( ) ) ) ;
auto scripts_file_or_error = Core : : File : : open ( scripts_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! scripts_file_or_error . is_error ( ) ) ;
auto scripts = JsonParser ( scripts_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( scripts . has_value ( ) ) ;
auto const & main_object = scripts - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( scripts_path . parent ( ) . basename ( ) ) ;
auto const & locale_display_names_object = locale_object . as_object ( ) . get ( " localeDisplayNames " sv ) ;
auto const & scripts_object = locale_display_names_object . as_object ( ) . get ( " scripts " sv ) ;
scripts_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
locale . scripts . set ( key , value . as_string ( ) ) ;
if ( ! locale_data . scripts . contains_slow ( key ) )
locale_data . scripts . append ( key ) ;
} ) ;
}
2021-09-08 15:22:49 -04:00
static void parse_locale_list_patterns ( String misc_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-09-06 13:56:44 -04:00
{
LexicalPath list_patterns_path ( move ( misc_path ) ) ;
list_patterns_path = list_patterns_path . append ( " listPatterns.json " sv ) ;
VERIFY ( Core : : File : : exists ( list_patterns_path . string ( ) ) ) ;
auto list_patterns_file_or_error = Core : : File : : open ( list_patterns_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! list_patterns_file_or_error . is_error ( ) ) ;
auto list_patterns = JsonParser ( list_patterns_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( list_patterns . has_value ( ) ) ;
auto const & main_object = list_patterns - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( list_patterns_path . parent ( ) . basename ( ) ) ;
auto const & list_patterns_object = locale_object . as_object ( ) . get ( " listPatterns " sv ) ;
auto list_pattern_type = [ ] ( StringView key ) {
if ( key . contains ( " type-standard " sv ) )
return " conjunction " sv ;
if ( key . contains ( " type-or " sv ) )
return " disjunction " sv ;
if ( key . contains ( " type-unit " sv ) )
return " unit " sv ;
VERIFY_NOT_REACHED ( ) ;
} ;
auto list_pattern_style = [ ] ( StringView key ) {
if ( key . contains ( " short " sv ) )
return " short " sv ;
if ( key . contains ( " narrow " sv ) )
return " narrow " sv ;
return " long " sv ;
} ;
list_patterns_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto type = list_pattern_type ( key ) ;
auto style = list_pattern_style ( key ) ;
auto start = value . as_object ( ) . get ( " start " sv ) . as_string ( ) ;
auto middle = value . as_object ( ) . get ( " middle " sv ) . as_string ( ) ;
auto end = value . as_object ( ) . get ( " end " sv ) . as_string ( ) ;
auto pair = value . as_object ( ) . get ( " 2 " sv ) . as_string ( ) ;
if ( ! locale_data . list_pattern_types . contains_slow ( type ) )
locale_data . list_pattern_types . append ( type ) ;
if ( ! locale_data . list_pattern_styles . contains_slow ( style ) )
locale_data . list_pattern_styles . append ( style ) ;
locale . list_patterns . append ( { move ( type ) , move ( style ) , move ( start ) , move ( middle ) , move ( end ) , move ( pair ) } ) ;
} ) ;
}
2021-08-26 08:38:54 -04:00
static void parse_locale_currencies ( String numbers_path , UnicodeLocaleData & locale_data , Locale & locale )
2021-08-24 19:50:18 -04:00
{
2021-08-26 08:38:54 -04:00
LexicalPath currencies_path ( move ( numbers_path ) ) ;
currencies_path = currencies_path . append ( " currencies.json " sv ) ;
VERIFY ( Core : : File : : exists ( currencies_path . string ( ) ) ) ;
auto currencies_file_or_error = Core : : File : : open ( currencies_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! currencies_file_or_error . is_error ( ) ) ;
auto currencies = JsonParser ( currencies_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( currencies . has_value ( ) ) ;
auto const & main_object = currencies - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( currencies_path . parent ( ) . basename ( ) ) ;
auto const & locale_numbers_object = locale_object . as_object ( ) . get ( " numbers " sv ) ;
auto const & currencies_object = locale_numbers_object . as_object ( ) . get ( " currencies " sv ) ;
currencies_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto const & display_name = value . as_object ( ) . get ( " displayName " sv ) ;
locale . currencies . set ( key , display_name . as_string ( ) ) ;
if ( ! locale_data . currencies . contains_slow ( key ) )
locale_data . currencies . append ( key ) ;
} ) ;
}
2021-09-10 09:56:11 -04:00
static void parse_numeric_keywords ( String locale_numbers_path , UnicodeLocaleData & locale_data , Locale & locale )
{
static constexpr StringView key = " nu " sv ;
LexicalPath numbers_path ( move ( locale_numbers_path ) ) ;
numbers_path = numbers_path . append ( " numbers.json " sv ) ;
VERIFY ( Core : : File : : exists ( numbers_path . string ( ) ) ) ;
auto numbers_file_or_error = Core : : File : : open ( numbers_path . string ( ) , Core : : OpenMode : : ReadOnly ) ;
VERIFY ( ! numbers_file_or_error . is_error ( ) ) ;
auto numbers = JsonParser ( numbers_file_or_error . value ( ) - > read_all ( ) ) . parse ( ) ;
VERIFY ( numbers . has_value ( ) ) ;
auto const & main_object = numbers - > as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( numbers_path . parent ( ) . basename ( ) ) ;
auto const & locale_numbers_object = locale_object . as_object ( ) . get ( " numbers " sv ) ;
auto const & default_numbering_system_object = locale_numbers_object . as_object ( ) . get ( " defaultNumberingSystem " sv ) ;
auto const & other_numbering_systems_object = locale_numbers_object . as_object ( ) . get ( " otherNumberingSystems " sv ) ;
Vector < String > keyword_values { } ;
keyword_values . append ( default_numbering_system_object . as_string ( ) ) ;
other_numbering_systems_object . as_object ( ) . for_each_member ( [ & ] ( auto const & , JsonValue const & value ) {
auto keyword_value = value . as_string ( ) ;
if ( ! keyword_values . contains_slow ( keyword_value ) )
keyword_values . append ( move ( keyword_value ) ) ;
} ) ;
StringBuilder builder ;
builder . join ( ' , ' , keyword_values ) ;
locale . keywords . set ( key , builder . build ( ) ) ;
if ( ! locale_data . keywords . contains_slow ( key ) )
locale_data . keywords . append ( key ) ;
}
2021-08-26 08:38:54 -04:00
static Core : : DirIterator path_to_dir_iterator ( String path )
{
LexicalPath lexical_path ( move ( path ) ) ;
lexical_path = lexical_path . append ( " main " sv ) ;
VERIFY ( Core : : File : : is_directory ( lexical_path . string ( ) ) ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 08:38:54 -04:00
Core : : DirIterator iterator ( lexical_path . string ( ) , Core : : DirIterator : : SkipParentAndBaseDir ) ;
2021-08-24 19:50:18 -04:00
if ( iterator . has_error ( ) ) {
2021-08-26 08:38:54 -04:00
warnln ( " {}: {} " , lexical_path . string ( ) , iterator . error_string ( ) ) ;
2021-08-24 19:50:18 -04:00
VERIFY_NOT_REACHED ( ) ;
}
2021-08-26 08:38:54 -04:00
return iterator ;
}
2021-09-06 13:56:44 -04:00
static void parse_all_locales ( String core_path , String locale_names_path , String misc_path , String numbers_path , UnicodeLocaleData & locale_data )
2021-08-26 08:38:54 -04:00
{
auto locale_names_iterator = path_to_dir_iterator ( move ( locale_names_path ) ) ;
2021-09-06 13:56:44 -04:00
auto misc_iterator = path_to_dir_iterator ( move ( misc_path ) ) ;
2021-08-26 08:38:54 -04:00
auto numbers_iterator = path_to_dir_iterator ( move ( numbers_path ) ) ;
2021-08-30 14:56:23 -04:00
LexicalPath core_supplemental_path ( move ( core_path ) ) ;
core_supplemental_path = core_supplemental_path . append ( " supplemental " sv ) ;
VERIFY ( Core : : File : : is_directory ( core_supplemental_path . string ( ) ) ) ;
parse_core_aliases ( core_supplemental_path . string ( ) , locale_data ) ;
2021-08-31 09:40:24 -04:00
parse_likely_subtags ( core_supplemental_path . string ( ) , locale_data ) ;
2021-08-30 14:56:23 -04:00
2021-09-06 15:33:56 -04:00
auto remove_variants_from_path = [ ] ( String path ) - > Optional < String > {
auto parsed_locale = parse_language ( LexicalPath : : basename ( path ) ) ;
if ( ! parsed_locale . has_value ( ) )
return { } ;
StringBuilder builder ;
builder . append ( parsed_locale - > language ) ;
if ( ! parsed_locale - > script . is_empty ( ) )
builder . appendff ( " -{} " , parsed_locale - > script ) ;
if ( ! parsed_locale - > region . is_empty ( ) )
builder . appendff ( " -{} " , parsed_locale - > region ) ;
return builder . build ( ) ;
} ;
2021-08-26 08:38:54 -04:00
while ( locale_names_iterator . has_next ( ) ) {
auto locale_path = locale_names_iterator . next_full_path ( ) ;
2021-08-24 19:50:18 -04:00
VERIFY ( Core : : File : : is_directory ( locale_path ) ) ;
2021-09-06 15:33:56 -04:00
auto language = remove_variants_from_path ( locale_path ) ;
if ( ! language . has_value ( ) )
continue ;
auto & locale = locale_data . locales . ensure ( * language ) ;
2021-08-24 19:50:18 -04:00
parse_identity ( locale_path , locale_data , locale ) ;
2021-08-26 08:17:01 -04:00
parse_locale_languages ( locale_path , locale ) ;
2021-08-24 19:50:18 -04:00
parse_locale_territories ( locale_path , locale ) ;
2021-08-26 08:29:39 -04:00
parse_locale_scripts ( locale_path , locale_data , locale ) ;
2021-08-24 19:50:18 -04:00
}
2021-08-26 08:38:54 -04:00
2021-09-06 13:56:44 -04:00
while ( misc_iterator . has_next ( ) ) {
auto misc_path = misc_iterator . next_full_path ( ) ;
VERIFY ( Core : : File : : is_directory ( misc_path ) ) ;
2021-09-06 15:33:56 -04:00
auto language = remove_variants_from_path ( misc_path ) ;
if ( ! language . has_value ( ) )
continue ;
auto & locale = locale_data . locales . ensure ( * language ) ;
2021-09-08 15:22:49 -04:00
parse_locale_list_patterns ( misc_path , locale_data , locale ) ;
2021-09-06 13:56:44 -04:00
}
2021-08-26 08:38:54 -04:00
while ( numbers_iterator . has_next ( ) ) {
auto numbers_path = numbers_iterator . next_full_path ( ) ;
VERIFY ( Core : : File : : is_directory ( numbers_path ) ) ;
2021-09-06 15:33:56 -04:00
auto language = remove_variants_from_path ( numbers_path ) ;
if ( ! language . has_value ( ) )
continue ;
auto & locale = locale_data . locales . ensure ( * language ) ;
2021-08-26 08:38:54 -04:00
parse_locale_currencies ( numbers_path , locale_data , locale ) ;
2021-09-10 09:56:11 -04:00
parse_numeric_keywords ( numbers_path , locale_data , locale ) ;
2021-08-26 08:38:54 -04:00
}
2021-08-24 19:50:18 -04:00
}
2021-08-26 06:42:11 -04:00
static String format_identifier ( StringView owner , String identifier )
2021-08-24 19:50:18 -04:00
{
2021-08-26 06:42:11 -04:00
identifier . replace ( " - " sv , " _ " sv , true ) ;
2021-08-24 19:50:18 -04:00
if ( all_of ( identifier , is_ascii_digit ) )
return String : : formatted ( " {}_{} " , owner [ 0 ] , identifier ) ;
2021-08-26 06:42:11 -04:00
return identifier . to_titlecase ( ) ;
2021-08-24 19:50:18 -04:00
}
static void generate_unicode_locale_header ( Core : : File & file , UnicodeLocaleData & locale_data )
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
2021-08-26 06:42:11 -04:00
auto generate_enum = [ & ] ( StringView name , StringView default_ , Vector < String > & values ) {
2021-08-24 19:50:18 -04:00
quick_sort ( values ) ;
generator . set ( " name " , name ) ;
2021-08-26 06:42:11 -04:00
generator . set ( " underlying " , ( ( values . size ( ) + ! default_ . is_empty ( ) ) < 256 ) ? " u8 " sv : " u16 " sv ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
2021-08-26 06:42:11 -04:00
enum class @ name @ : @ underlying @ { ) ~ ~ ~ " );
if ( ! default_ . is_empty ( ) ) {
generator . set ( " default " , default_ ) ;
generator . append ( R " ~~~(
@ default @ , ) ~ ~ ~ " );
}
2021-08-24 19:50:18 -04:00
for ( auto const & value : values ) {
generator . set ( " value " , format_identifier ( name , value ) ) ;
generator . append ( R " ~~~(
@ value @ , ) ~ ~ ~ " );
}
generator . append ( R " ~~~(
} ;
) ~ ~ ~ " );
} ;
generator . append ( R " ~~~(
# pragma once
# include <AK/Optional.h>
2021-08-26 06:56:17 -04:00
# include <AK/StringView.h>
2021-08-24 19:50:18 -04:00
# include <AK/Types.h>
# include <LibUnicode/Forward.h>
namespace Unicode {
) ~ ~ ~ " );
2021-08-26 06:42:11 -04:00
auto locales = locale_data . locales . keys ( ) ;
generate_enum ( " Locale " sv , " None " sv , locales ) ;
generate_enum ( " Language " sv , { } , locale_data . languages ) ;
generate_enum ( " Territory " sv , { } , locale_data . territories ) ;
2021-08-26 08:29:39 -04:00
generate_enum ( " ScriptTag " sv , { } , locale_data . scripts ) ;
2021-08-26 08:38:54 -04:00
generate_enum ( " Currency " sv , { } , locale_data . currencies ) ;
2021-09-10 09:56:11 -04:00
generate_enum ( " Key " sv , { } , locale_data . keywords ) ;
2021-08-26 06:42:11 -04:00
generate_enum ( " Variant " sv , { } , locale_data . variants ) ;
2021-09-06 13:56:44 -04:00
generate_enum ( " ListPatternType " sv , { } , locale_data . list_pattern_types ) ;
generate_enum ( " ListPatternStyle " sv , { } , locale_data . list_pattern_styles ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
namespace Detail {
2021-08-26 06:42:11 -04:00
Optional < Locale > locale_from_string ( StringView const & locale ) ;
2021-08-26 08:17:01 -04:00
Optional < StringView > get_locale_language_mapping ( StringView locale , StringView language ) ;
2021-08-24 19:50:18 -04:00
Optional < Language > language_from_string ( StringView const & language ) ;
2021-08-30 14:56:23 -04:00
Optional < StringView > resolve_language_alias ( StringView const & language ) ;
2021-08-26 06:56:17 -04:00
Optional < StringView > get_locale_territory_mapping ( StringView locale , StringView territory ) ;
2021-08-24 19:50:18 -04:00
Optional < Territory > territory_from_string ( StringView const & territory ) ;
2021-08-30 14:56:23 -04:00
Optional < StringView > resolve_territory_alias ( StringView const & territory ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 08:29:39 -04:00
Optional < StringView > get_locale_script_tag_mapping ( StringView locale , StringView script_tag ) ;
Optional < ScriptTag > script_tag_from_string ( StringView const & script_tag ) ;
2021-08-30 14:56:23 -04:00
Optional < StringView > resolve_script_tag_alias ( StringView const & script_tag ) ;
2021-08-26 08:29:39 -04:00
2021-08-26 08:38:54 -04:00
Optional < StringView > get_locale_currency_mapping ( StringView locale , StringView currency ) ;
Optional < Currency > currency_from_string ( StringView const & currency ) ;
2021-09-10 09:56:11 -04:00
Optional < StringView > get_locale_key_mapping ( StringView locale , StringView key ) ;
Optional < Key > key_from_string ( StringView const & key ) ;
2021-09-06 13:56:44 -04:00
Optional < ListPatterns > get_locale_list_pattern_mapping ( StringView locale , StringView list_pattern_type , StringView list_pattern_style ) ;
Optional < ListPatternType > list_pattern_type_from_string ( StringView const & list_pattern_type ) ;
Optional < ListPatternStyle > list_pattern_style_from_string ( StringView const & list_pattern_style ) ;
2021-08-30 14:56:23 -04:00
Optional < StringView > resolve_variant_alias ( StringView const & variant ) ;
Optional < StringView > resolve_subdivision_alias ( StringView const & subdivision ) ;
2021-08-31 10:03:49 -04:00
void resolve_complex_language_aliases ( Unicode : : LanguageID & language_id ) ;
2021-09-02 18:21:42 -04:00
Optional < Unicode : : LanguageID > add_likely_subtags ( Unicode : : LanguageID const & language_id ) ;
2021-08-31 09:40:24 -04:00
Optional < String > resolve_most_likely_territory ( Unicode : : LanguageID const & language_id ) ;
2021-08-31 10:03:49 -04:00
2021-08-24 19:50:18 -04:00
}
}
) ~ ~ ~ " );
write_to_file_if_different ( file , generator . as_string_view ( ) ) ;
}
static void generate_unicode_locale_implementation ( Core : : File & file , UnicodeLocaleData & locale_data )
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . set ( " locales_size " sv , String : : number ( locale_data . locales . size ( ) ) ) ;
generator . set ( " territories_size " , String : : number ( locale_data . territories . size ( ) ) ) ;
2021-09-02 17:46:35 -04:00
generator . set ( " variants_size " , String : : number ( locale_data . max_variant_size ) ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
# include <AK/Array.h>
2021-08-26 06:56:17 -04:00
# include <AK/HashMap.h>
# include <AK/Span.h>
2021-08-31 10:03:49 -04:00
# include <LibUnicode/Locale.h>
2021-08-24 19:50:18 -04:00
# include <LibUnicode/UnicodeLocale.h>
namespace Unicode {
2021-09-06 13:56:44 -04:00
struct Patterns {
ListPatternType type ;
ListPatternStyle style ;
StringView start ;
StringView middle ;
StringView end ;
StringView pair ;
} ;
2021-08-24 19:50:18 -04:00
) ~ ~ ~ " );
auto format_mapping_name = [ ] ( StringView format , StringView name ) {
auto mapping_name = name . to_lowercase_string ( ) ;
mapping_name . replace ( " - " sv , " _ " sv , true ) ;
return String : : formatted ( format , mapping_name ) ;
} ;
2021-09-02 17:46:35 -04:00
auto append_string = [ & ] ( StringView value ) {
if ( value . is_empty ( ) )
generator . append ( " , {} " sv ) ;
else
generator . append ( String : : formatted ( " , \" {} \" sv " , value ) ) ;
} ;
auto append_list_and_size = [ & ] ( auto const & list ) {
if ( list . is_empty ( ) ) {
generator . append ( " , {}, 0 " ) ;
return ;
}
bool first = true ;
generator . append ( " , { " ) ;
for ( auto const & item : list ) {
generator . append ( first ? " " : " , " ) ;
generator . append ( String : : formatted ( " \" {} \" sv " , item ) ) ;
first = false ;
}
generator . append ( String : : formatted ( " }}, {} " , list . size ( ) ) ) ;
} ;
2021-09-06 13:56:44 -04:00
auto append_string_list = [ & ] ( String name , auto const & keys , auto const & mappings ) {
2021-08-24 19:50:18 -04:00
generator . set ( " name " , name ) ;
2021-08-26 06:56:17 -04:00
generator . set ( " size " , String : : number ( keys . size ( ) ) ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
2021-08-26 06:56:17 -04:00
static constexpr Array < StringView , @ size @ > @ name @ { {
) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
constexpr size_t max_values_per_row = 10 ;
size_t values_in_current_row = 0 ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
for ( auto const & key : keys ) {
if ( values_in_current_row + + > 0 )
generator . append ( " " ) ;
if ( auto it = mappings . find ( key ) ; it ! = mappings . end ( ) )
generator . set ( " mapping " sv , String : : formatted ( " \" {} \" sv " , it - > value ) ) ;
else
generator . set ( " mapping " sv , " {} " sv ) ;
generator . append ( " @mapping@, " ) ;
if ( values_in_current_row = = max_values_per_row ) {
values_in_current_row = 0 ;
generator . append ( " \n " ) ;
}
2021-08-24 19:50:18 -04:00
}
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
2021-09-06 13:56:44 -04:00
auto append_list_patterns = [ & ] ( StringView name , Vector < ListPatterns > const & list_patterns ) {
generator . set ( " name " , name ) ;
generator . set ( " size " , String : : number ( list_patterns . size ( ) ) ) ;
generator . append ( R " ~~~(
static constexpr Array < Patterns , @ size @ > @ name @ { { ) ~ ~ ~ " );
for ( auto const & list_pattern : list_patterns ) {
generator . set ( " type " sv , String : : formatted ( " ListPatternType::{} " , format_identifier ( { } , list_pattern . type ) ) ) ;
generator . set ( " style " sv , String : : formatted ( " ListPatternStyle::{} " , format_identifier ( { } , list_pattern . style ) ) ) ;
generator . set ( " start " sv , String : : formatted ( " \" {} \" sv " , list_pattern . start ) ) ;
generator . set ( " middle " sv , String : : formatted ( " \" {} \" sv " , list_pattern . middle ) ) ;
generator . set ( " end " sv , String : : formatted ( " \" {} \" sv " , list_pattern . end ) ) ;
generator . set ( " pair " sv , String : : formatted ( " \" {} \" sv " , list_pattern . pair ) ) ;
generator . append ( R " ~~~(
{ @ type @ , @ style @ , @ start @ , @ middle @ , @ end @ , @ pair @ } , ) ~ ~ ~ " );
}
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
auto append_mapping = [ & ] ( StringView type , StringView name , StringView format , auto format_list_callback ) {
2021-08-26 06:56:17 -04:00
Vector < String > mapping_names ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
for ( auto const & locale : locale_data . locales ) {
auto mapping_name = format_mapping_name ( format , locale . key ) ;
2021-09-06 13:56:44 -04:00
format_list_callback ( mapping_name , locale . value ) ;
2021-08-26 06:56:17 -04:00
mapping_names . append ( move ( mapping_name ) ) ;
}
quick_sort ( mapping_names ) ;
2021-09-06 13:56:44 -04:00
generator . set ( " type " , type ) ;
2021-08-26 06:56:17 -04:00
generator . set ( " name " , name ) ;
generator . set ( " size " , String : : number ( locale_data . locales . size ( ) ) ) ;
generator . append ( R " ~~~(
2021-09-06 13:56:44 -04:00
static constexpr Array < Span < @ type @ const > , @ size @ > @ name @ { {
2021-08-26 06:56:17 -04:00
) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
constexpr size_t max_values_per_row = 10 ;
size_t values_in_current_row = 0 ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
for ( auto & mapping_name : mapping_names ) {
if ( values_in_current_row + + > 0 )
generator . append ( " " ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
generator . set ( " name " , move ( mapping_name ) ) ;
generator . append ( " @name@.span(), " ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
if ( values_in_current_row = = max_values_per_row ) {
values_in_current_row = 0 ;
generator . append ( " \n " ) ;
}
}
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
2021-08-24 19:50:18 -04:00
2021-09-06 13:56:44 -04:00
append_mapping ( " StringView " sv , " s_languages " sv , " s_languages_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_list ( name , locale_data . languages , value . languages ) ; } ) ;
append_mapping ( " StringView " sv , " s_territories " sv , " s_territories_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_list ( name , locale_data . territories , value . territories ) ; } ) ;
append_mapping ( " StringView " sv , " s_scripts " sv , " s_scripts_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_list ( name , locale_data . scripts , value . scripts ) ; } ) ;
append_mapping ( " StringView " sv , " s_currencies " sv , " s_currencies_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_list ( name , locale_data . currencies , value . currencies ) ; } ) ;
2021-09-10 09:56:11 -04:00
append_mapping ( " StringView " sv , " s_keywords " sv , " s_keywords_{} " , [ & ] ( auto const & name , auto const & value ) { append_string_list ( name , locale_data . keywords , value . keywords ) ; } ) ;
2021-09-06 13:56:44 -04:00
append_mapping ( " Patterns " sv , " s_list_patterns " sv , " s_list_patterns_{} " , [ & ] ( auto const & name , auto const & value ) { append_list_patterns ( name , value . list_patterns ) ; } ) ;
2021-08-24 19:50:18 -04:00
2021-08-31 10:03:49 -04:00
generator . append ( R " ~~~(
2021-09-02 17:46:35 -04:00
struct CanonicalLanguageID {
Unicode : : LanguageID to_unicode_language_id ( ) const
{
Unicode : : LanguageID language_id { } ;
language_id . variants . ensure_capacity ( variants_size ) ;
language_id . language = language . to_string ( ) ;
if ( ! script . is_empty ( ) )
language_id . script = script . to_string ( ) ;
if ( ! region . is_empty ( ) )
language_id . region = region . to_string ( ) ;
for ( size_t i = 0 ; i < variants_size ; + + i )
language_id . variants . append ( variants [ i ] . to_string ( ) ) ;
return language_id ;
}
bool matches_variants ( Vector < String > const & other_variants ) const {
if ( variants_size = = 0 )
return true ;
if ( other_variants . size ( ) ! = variants_size )
return false ;
for ( size_t i = 0 ; i < variants_size ; + + i ) {
if ( variants [ i ] ! = other_variants [ i ] )
return false ;
}
return true ;
} ;
StringView language { } ;
StringView script { } ;
StringView region { } ;
Array < StringView , @ variants_size @ > variants { } ;
size_t variants_size { 0 } ;
} ;
2021-08-31 10:03:49 -04:00
struct LanguageMapping {
2021-09-02 17:46:35 -04:00
CanonicalLanguageID key ;
CanonicalLanguageID alias ;
2021-08-31 10:03:49 -04:00
} ;
) ~ ~ ~ " );
2021-09-02 17:46:35 -04:00
auto append_complex_mapping = [ & ] ( StringView name , auto & mappings ) {
generator . set ( " size " , String : : number ( mappings . size ( ) ) ) ;
2021-08-31 10:03:49 -04:00
generator . set ( " name " sv , name ) ;
2021-09-02 17:46:35 -04:00
generator . append ( R " ~~~(
static constexpr Array < LanguageMapping , @ size @ > s_ @ name @ { {
2021-08-31 10:03:49 -04:00
) ~ ~ ~ " );
2021-09-02 17:46:35 -04:00
quick_sort ( mappings , [ ] ( auto const & lhs , auto const & rhs ) {
auto const & lhs_language = lhs . key . language ;
auto const & rhs_language = rhs . key . language ;
2021-08-31 10:03:49 -04:00
// Sort the keys such that "und" language tags are at the end, as those are less specific.
2021-09-02 17:46:35 -04:00
if ( lhs_language . starts_with ( " und " sv ) & & ! rhs_language . starts_with ( " und " sv ) )
2021-08-31 10:03:49 -04:00
return false ;
2021-09-02 17:46:35 -04:00
if ( ! lhs_language . starts_with ( " und " sv ) & & rhs_language . starts_with ( " und " sv ) )
2021-08-31 10:03:49 -04:00
return true ;
2021-09-02 17:46:35 -04:00
return lhs_language < rhs_language ;
2021-08-31 10:03:49 -04:00
} ) ;
2021-09-02 17:46:35 -04:00
for ( auto const & mapping : mappings ) {
generator . set ( " language " sv , mapping . key . language ) ;
generator . append ( " { { \" @language@ \" sv " ) ;
2021-08-31 10:03:49 -04:00
2021-09-02 17:46:35 -04:00
append_string ( mapping . key . script ) ;
append_string ( mapping . key . region ) ;
append_list_and_size ( mapping . key . variants ) ;
2021-08-31 10:03:49 -04:00
2021-09-02 17:46:35 -04:00
generator . set ( " language " sv , mapping . alias . language ) ;
generator . append ( " }, { \" @language@ \" sv " ) ;
append_string ( mapping . alias . script ) ;
append_string ( mapping . alias . region ) ;
append_list_and_size ( mapping . alias . variants ) ;
generator . append ( " } }, \n " ) ;
}
generator . append ( " } }; \n " ) ;
2021-08-31 10:03:49 -04:00
} ;
append_complex_mapping ( " complex_alias " sv , locale_data . complex_mappings ) ;
2021-08-31 09:40:24 -04:00
append_complex_mapping ( " likely_subtags " sv , locale_data . likely_subtags ) ;
2021-08-31 10:03:49 -04:00
2021-08-26 06:56:17 -04:00
generator . append ( R " ~~~(
2021-09-02 18:21:42 -04:00
static LanguageMapping const * resolve_likely_subtag ( Unicode : : LanguageID const & language_id )
2021-08-31 09:40:24 -04:00
{
// https://unicode.org/reports/tr35/#Likely_Subtags
enum class State {
LanguageScriptRegion ,
LanguageRegion ,
LanguageScript ,
Language ,
UndScript ,
Done ,
} ;
auto state = State : : LanguageScriptRegion ;
while ( state ! = State : : Done ) {
2021-09-02 17:46:35 -04:00
CanonicalLanguageID search_key ;
2021-08-31 09:40:24 -04:00
switch ( state ) {
case State : : LanguageScriptRegion :
state = State : : LanguageRegion ;
if ( ! language_id . script . has_value ( ) | | ! language_id . region . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . script = * language_id . script ;
search_key . region = * language_id . region ;
2021-08-31 09:40:24 -04:00
break ;
case State : : LanguageRegion :
state = State : : LanguageScript ;
if ( ! language_id . region . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . region = * language_id . region ;
2021-08-31 09:40:24 -04:00
break ;
case State : : LanguageScript :
state = State : : Language ;
if ( ! language_id . script . has_value ( ) )
continue ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
search_key . script = * language_id . script ;
2021-08-31 09:40:24 -04:00
break ;
case State : : Language :
state = State : : UndScript ;
2021-09-02 17:46:35 -04:00
search_key . language = * language_id . language ;
2021-08-31 09:40:24 -04:00
break ;
case State : : UndScript :
state = State : : Done ;
if ( ! language_id . script . has_value ( ) )
continue ;
search_key . language = " und " sv ;
2021-09-02 17:46:35 -04:00
search_key . script = * language_id . script ;
2021-08-31 09:40:24 -04:00
break ;
default :
VERIFY_NOT_REACHED ( ) ;
}
2021-09-02 17:46:35 -04:00
for ( auto const & map : s_likely_subtags ) {
2021-08-31 09:40:24 -04:00
if ( map . key . language ! = search_key . language )
continue ;
if ( map . key . script ! = search_key . script )
continue ;
if ( map . key . region ! = search_key . region )
continue ;
2021-09-02 18:21:42 -04:00
return & map ;
2021-08-31 09:40:24 -04:00
}
}
return nullptr ;
}
2021-08-24 19:50:18 -04:00
namespace Detail {
2021-08-26 06:56:17 -04:00
) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
2021-08-26 06:56:17 -04:00
auto append_mapping_search = [ & ] ( StringView enum_title , StringView enum_snake , StringView collection_name ) {
generator . set ( " enum_title " , enum_title ) ;
generator . set ( " enum_snake " , enum_snake ) ;
generator . set ( " collection_name " , collection_name ) ;
generator . append ( R " ~~~(
Optional < StringView > get_locale_ @ enum_snake @ _mapping ( StringView locale , StringView @ enum_snake @ )
2021-08-24 19:50:18 -04:00
{
2021-08-26 06:56:17 -04:00
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return { } ;
auto @ enum_snake @ _value = @ enum_snake @ _from_string ( @ enum_snake @ ) ;
if ( ! @ enum_snake @ _value . has_value ( ) )
return { } ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto @ enum_snake @ _index = to_underlying ( * @ enum_snake @ _value ) ;
auto const & mappings = @ collection_name @ . at ( locale_index ) ;
auto @ enum_snake @ _mapping = mappings . at ( @ enum_snake @ _index ) ;
if ( @ enum_snake @ _mapping . is_empty ( ) )
return { } ;
return @ enum_snake @ _mapping ;
2021-08-24 19:50:18 -04:00
}
) ~ ~ ~ " );
2021-08-26 06:56:17 -04:00
} ;
2021-08-24 19:50:18 -04:00
2021-08-26 06:42:11 -04:00
auto append_from_string = [ & ] ( StringView enum_title , StringView enum_snake , Vector < String > const & values ) {
2021-08-24 19:50:18 -04:00
generator . set ( " enum_title " , enum_title ) ;
generator . set ( " enum_snake " , enum_snake ) ;
generator . append ( R " ~~~(
Optional < @ enum_title @ > @ enum_snake @ _from_string ( StringView const & @ enum_snake @ )
{
2021-08-26 06:56:17 -04:00
static HashMap < StringView , @ enum_title @ > @ enum_snake @ _values { { ) ~ ~ ~ " );
2021-08-24 19:50:18 -04:00
for ( auto const & value : values ) {
generator . set ( " key " sv , value ) ;
generator . set ( " value " sv , format_identifier ( enum_title , value ) ) ;
generator . append ( R " ~~~(
{ " @key@ " sv , @ enum_title @ : : @ value @ } , ) ~ ~ ~ " );
}
generator . append ( R " ~~~(
} } ;
if ( auto value = @ enum_snake @ _values . get ( @ enum_snake @ ) ; value . has_value ( ) )
return value . value ( ) ;
return { } ;
}
) ~ ~ ~ " );
} ;
2021-08-30 14:56:23 -04:00
auto append_alias_search = [ & ] ( StringView enum_snake , HashMap < String , String > const & aliases ) {
generator . set ( " enum_snake " , enum_snake ) ;
generator . append ( R " ~~~(
Optional < StringView > resolve_ @ enum_snake @ _alias ( StringView const & @ enum_snake @ )
{
2021-08-31 10:03:49 -04:00
static HashMap < StringView , StringView > @ enum_snake @ _aliases { {
) ~ ~ ~ " );
constexpr size_t max_values_per_row = 10 ;
size_t values_in_current_row = 0 ;
2021-08-30 14:56:23 -04:00
for ( auto const & alias : aliases ) {
2021-08-31 10:03:49 -04:00
if ( values_in_current_row + + > 0 )
generator . append ( " " ) ;
2021-08-30 14:56:23 -04:00
generator . set ( " key " sv , alias . key ) ;
generator . set ( " alias " sv , alias . value ) ;
2021-08-31 10:03:49 -04:00
generator . append ( " { \" @key@ \" sv, \" @alias@ \" sv }, " ) ;
2021-08-30 14:56:23 -04:00
2021-08-31 10:03:49 -04:00
if ( values_in_current_row = = max_values_per_row ) {
generator . append ( " \n " ) ;
values_in_current_row = 0 ;
}
2021-08-30 14:56:23 -04:00
}
generator . append ( R " ~~~(
} } ;
if ( auto alias = @ enum_snake @ _aliases . get ( @ enum_snake @ ) ; alias . has_value ( ) )
return alias . value ( ) ;
return { } ;
}
) ~ ~ ~ " );
} ;
2021-08-26 06:42:11 -04:00
append_from_string ( " Locale " sv , " locale " sv , locale_data . locales . keys ( ) ) ;
2021-08-26 08:17:01 -04:00
append_mapping_search ( " Language " sv , " language " sv , " s_languages " sv ) ;
2021-08-24 19:50:18 -04:00
append_from_string ( " Language " sv , " language " sv , locale_data . languages ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " language " sv , locale_data . language_aliases ) ;
2021-08-26 06:56:17 -04:00
append_mapping_search ( " Territory " sv , " territory " sv , " s_territories " sv ) ;
2021-08-24 19:50:18 -04:00
append_from_string ( " Territory " sv , " territory " sv , locale_data . territories ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " territory " sv , locale_data . territory_aliases ) ;
2021-08-24 19:50:18 -04:00
2021-08-26 08:29:39 -04:00
append_mapping_search ( " ScriptTag " sv , " script_tag " sv , " s_scripts " sv ) ;
append_from_string ( " ScriptTag " sv , " script_tag " sv , locale_data . scripts ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " script_tag " sv , locale_data . script_aliases ) ;
2021-08-26 08:29:39 -04:00
2021-08-26 08:38:54 -04:00
append_mapping_search ( " Currency " sv , " currency " sv , " s_currencies " sv ) ;
append_from_string ( " Currency " sv , " currency " sv , locale_data . currencies ) ;
2021-09-10 09:56:11 -04:00
append_mapping_search ( " Key " sv , " key " sv , " s_keywords " sv ) ;
append_from_string ( " Key " sv , " key " sv , locale_data . keywords ) ;
2021-08-30 14:56:23 -04:00
append_alias_search ( " variant " sv , locale_data . variant_aliases ) ;
append_alias_search ( " subdivision " sv , locale_data . subdivision_aliases ) ;
2021-09-06 13:56:44 -04:00
append_from_string ( " ListPatternType " sv , " list_pattern_type " sv , locale_data . list_pattern_types ) ;
append_from_string ( " ListPatternStyle " sv , " list_pattern_style " sv , locale_data . list_pattern_styles ) ;
2021-08-24 19:50:18 -04:00
generator . append ( R " ~~~(
2021-09-06 13:56:44 -04:00
Optional < ListPatterns > get_locale_list_pattern_mapping ( StringView locale , StringView list_pattern_type , StringView list_pattern_style )
{
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return { } ;
auto type_value = list_pattern_type_from_string ( list_pattern_type ) ;
if ( ! type_value . has_value ( ) )
return { } ;
auto style_value = list_pattern_style_from_string ( list_pattern_style ) ;
if ( ! style_value . has_value ( ) )
return { } ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto const & locale_list_patterns = s_list_patterns . at ( locale_index ) ;
for ( auto const & list_patterns : locale_list_patterns ) {
if ( ( list_patterns . type = = type_value ) & & ( list_patterns . style = = style_value ) )
return ListPatterns { list_patterns . start , list_patterns . middle , list_patterns . end , list_patterns . pair } ;
}
return { } ;
}
2021-08-31 10:03:49 -04:00
void resolve_complex_language_aliases ( Unicode : : LanguageID & language_id )
{
2021-09-02 17:46:35 -04:00
for ( auto const & map : s_complex_alias ) {
2021-08-31 10:03:49 -04:00
if ( ( map . key . language ! = language_id . language ) & & ( map . key . language ! = " und " sv ) )
continue ;
2021-09-02 17:46:35 -04:00
if ( ! map . key . script . is_empty ( ) & & ( map . key . script ! = language_id . script ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-09-02 17:46:35 -04:00
if ( ! map . key . region . is_empty ( ) & & ( map . key . region ! = language_id . region ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-09-02 17:46:35 -04:00
if ( ! map . key . matches_variants ( language_id . variants ) )
2021-08-31 10:03:49 -04:00
continue ;
2021-09-02 17:46:35 -04:00
auto alias = map . alias . to_unicode_language_id ( ) ;
2021-08-31 10:03:49 -04:00
if ( alias . language = = " und " sv )
alias . language = move ( language_id . language ) ;
2021-09-02 17:46:35 -04:00
if ( map . key . script . is_empty ( ) & & ! alias . script . has_value ( ) )
2021-08-31 10:03:49 -04:00
alias . script = move ( language_id . script ) ;
2021-09-02 17:46:35 -04:00
if ( map . key . region . is_empty ( ) & & ! alias . region . has_value ( ) )
2021-08-31 10:03:49 -04:00
alias . region = move ( language_id . region ) ;
2021-09-02 17:46:35 -04:00
if ( map . key . variants_size = = 0 & & alias . variants . is_empty ( ) )
2021-08-31 10:03:49 -04:00
alias . variants = move ( language_id . variants ) ;
language_id = move ( alias ) ;
break ;
}
}
2021-09-02 18:21:42 -04:00
Optional < Unicode : : LanguageID > add_likely_subtags ( Unicode : : LanguageID const & language_id )
{
// https://www.unicode.org/reports/tr35/#Likely_Subtags
auto const * likely_subtag = resolve_likely_subtag ( language_id ) ;
if ( likely_subtag = = nullptr )
return { } ;
auto maximized = language_id ;
auto const & key = likely_subtag - > key ;
auto const & alias = likely_subtag - > alias ;
if ( maximized . language = = " und " sv )
maximized . language = alias . language ;
if ( ! maximized . script . has_value ( ) | | ( ! key . script . is_empty ( ) & & ! alias . script . is_empty ( ) ) )
maximized . script = alias . script ;
if ( ! maximized . region . has_value ( ) | | ( ! key . region . is_empty ( ) & & ! alias . region . is_empty ( ) ) )
maximized . region = alias . region ;
return maximized ;
}
2021-08-31 09:40:24 -04:00
Optional < String > resolve_most_likely_territory ( Unicode : : LanguageID const & language_id )
{
if ( auto const * likely_subtag = resolve_likely_subtag ( language_id ) ; likely_subtag ! = nullptr )
2021-09-02 18:21:42 -04:00
return likely_subtag - > alias . region ;
2021-08-31 09:40:24 -04:00
return { } ;
}
2021-08-24 19:50:18 -04:00
}
}
) ~ ~ ~ " );
write_to_file_if_different ( file , generator . as_string_view ( ) ) ;
}
int main ( int argc , char * * argv )
{
char const * generated_header_path = nullptr ;
char const * generated_implementation_path = nullptr ;
2021-08-30 08:56:28 -04:00
char const * core_path = nullptr ;
2021-08-24 19:50:18 -04:00
char const * locale_names_path = nullptr ;
2021-09-05 22:46:03 -04:00
char const * misc_path = nullptr ;
2021-08-26 08:31:31 -04:00
char const * numbers_path = nullptr ;
2021-08-24 19:50:18 -04:00
Core : : ArgsParser args_parser ;
args_parser . add_option ( generated_header_path , " Path to the Unicode locale header file to generate " , " generated-header-path " , ' h ' , " generated-header-path " ) ;
args_parser . add_option ( generated_implementation_path , " Path to the Unicode locale implementation file to generate " , " generated-implementation-path " , ' c ' , " generated-implementation-path " ) ;
2021-08-30 08:56:28 -04:00
args_parser . add_option ( core_path , " Path to cldr-core directory " , " core-path " , ' r ' , " core-path " ) ;
2021-08-24 19:50:18 -04:00
args_parser . add_option ( locale_names_path , " Path to cldr-localenames directory " , " locale-names-path " , ' l ' , " locale-names-path " ) ;
2021-09-05 22:46:03 -04:00
args_parser . add_option ( misc_path , " Path to cldr-misc directory " , " misc-path " , ' m ' , " misc-path " ) ;
2021-08-26 08:31:31 -04:00
args_parser . add_option ( numbers_path , " Path to cldr-numbers directory " , " numbers-path " , ' n ' , " numbers-path " ) ;
2021-08-24 19:50:18 -04:00
args_parser . parse ( argc , argv ) ;
auto open_file = [ & ] ( StringView path , StringView flags , Core : : OpenMode mode = Core : : OpenMode : : ReadOnly ) {
if ( path . is_empty ( ) ) {
warnln ( " {} is required " , flags ) ;
args_parser . print_usage ( stderr , argv [ 0 ] ) ;
exit ( 1 ) ;
}
auto file_or_error = Core : : File : : open ( path , mode ) ;
if ( file_or_error . is_error ( ) ) {
warnln ( " Failed to open {}: {} " , path , file_or_error . release_error ( ) ) ;
exit ( 1 ) ;
}
return file_or_error . release_value ( ) ;
} ;
auto generated_header_file = open_file ( generated_header_path , " -h/--generated-header-path " , Core : : OpenMode : : ReadWrite ) ;
auto generated_implementation_file = open_file ( generated_implementation_path , " -c/--generated-implementation-path " , Core : : OpenMode : : ReadWrite ) ;
UnicodeLocaleData locale_data ;
2021-09-06 13:56:44 -04:00
parse_all_locales ( core_path , locale_names_path , misc_path , numbers_path , locale_data ) ;
2021-08-24 19:50:18 -04:00
generate_unicode_locale_header ( generated_header_file , locale_data ) ;
generate_unicode_locale_implementation ( generated_implementation_file , locale_data ) ;
return 0 ;
}