2021-11-19 11:36:28 -05:00
/*
* Copyright ( c ) 2021 , Tim Flynn < trflynn89 @ pm . me >
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
# include "GeneratorUtil.h"
2021-11-30 08:59:19 -05:00
# include <AK/AllOf.h>
# include <AK/CharacterTypes.h>
2021-11-19 11:36:28 -05:00
# include <AK/Format.h>
2021-11-30 08:59:19 -05:00
# include <AK/GenericLexer.h>
2021-12-02 21:16:16 -05:00
# include <AK/HashFunctions.h>
2021-11-19 11:36:28 -05:00
# include <AK/HashMap.h>
# include <AK/JsonObject.h>
# include <AK/JsonParser.h>
# include <AK/JsonValue.h>
# include <AK/LexicalPath.h>
# include <AK/SourceGenerator.h>
# include <AK/String.h>
# include <AK/StringBuilder.h>
2021-12-02 21:16:16 -05:00
# include <AK/Traits.h>
2021-11-30 08:59:19 -05:00
# include <AK/Utf8View.h>
2021-11-19 11:36:28 -05:00
# include <LibCore/ArgsParser.h>
# include <LibCore/DirIterator.h>
# include <LibCore/File.h>
2021-11-27 14:54:48 -05:00
# include <LibUnicode/DateTimeFormat.h>
2021-11-19 11:36:28 -05:00
using StringIndexType = u16 ;
constexpr auto s_string_index_type = " u16 " sv ;
2021-12-02 21:16:16 -05:00
using CalendarPatternIndexType = u16 ;
constexpr auto s_calendar_pattern_index_type = " u16 " sv ;
2021-11-27 14:54:48 -05:00
struct CalendarPattern : public Unicode : : CalendarPattern {
2021-12-02 21:16:16 -05:00
unsigned hash ( ) const
{
2021-11-30 08:59:19 -05:00
auto hash = pair_int_hash ( pattern_index , pattern12_index ) ;
auto hash_field = [ & ] ( auto const & field ) {
if ( field . has_value ( ) )
hash = pair_int_hash ( hash , static_cast < u8 > ( * field ) ) ;
else
hash = pair_int_hash ( hash , - 1 ) ;
} ;
hash_field ( era ) ;
hash_field ( year ) ;
hash_field ( month ) ;
hash_field ( weekday ) ;
hash_field ( day ) ;
hash_field ( day_period ) ;
hash_field ( hour ) ;
hash_field ( minute ) ;
hash_field ( second ) ;
hash_field ( fractional_second_digits ) ;
hash_field ( time_zone_name ) ;
return hash ;
2021-12-02 21:16:16 -05:00
}
bool operator = = ( CalendarPattern const & other ) const
{
2021-11-30 08:59:19 -05:00
return ( pattern_index = = other . pattern_index )
& & ( pattern12_index = = other . pattern12_index )
& & ( era = = other . era )
& & ( year = = other . year )
& & ( month = = other . month )
& & ( weekday = = other . weekday )
& & ( day = = other . day )
& & ( day_period = = other . day_period )
& & ( hour = = other . hour )
& & ( minute = = other . minute )
& & ( second = = other . second )
& & ( fractional_second_digits = = other . fractional_second_digits )
& & ( time_zone_name = = other . time_zone_name ) ;
2021-12-02 21:16:16 -05:00
}
2021-11-27 14:54:48 -05:00
StringIndexType pattern_index { 0 } ;
2021-11-30 08:59:19 -05:00
StringIndexType pattern12_index { 0 } ;
2021-11-27 14:54:48 -05:00
} ;
2021-12-02 21:16:16 -05:00
template < >
struct AK : : Formatter < CalendarPattern > : Formatter < FormatString > {
ErrorOr < void > format ( FormatBuilder & builder , CalendarPattern const & pattern )
{
2021-11-30 08:59:19 -05:00
auto field_to_i8 = [ ] ( auto const & field ) - > i8 {
if ( ! field . has_value ( ) )
return - 1 ;
return static_cast < i8 > ( * field ) ;
} ;
return Formatter < FormatString > : : format ( builder ,
" {{ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} }} " ,
pattern . pattern_index ,
pattern . pattern12_index ,
field_to_i8 ( pattern . era ) ,
field_to_i8 ( pattern . year ) ,
field_to_i8 ( pattern . month ) ,
field_to_i8 ( pattern . weekday ) ,
field_to_i8 ( pattern . day ) ,
field_to_i8 ( pattern . day_period ) ,
field_to_i8 ( pattern . hour ) ,
field_to_i8 ( pattern . minute ) ,
field_to_i8 ( pattern . second ) ,
field_to_i8 ( pattern . fractional_second_digits ) ,
field_to_i8 ( pattern . time_zone_name ) ) ;
2021-12-02 21:16:16 -05:00
}
} ;
template < >
struct AK : : Traits < CalendarPattern > : public GenericTraits < CalendarPattern > {
static unsigned hash ( CalendarPattern const & c ) { return c . hash ( ) ; }
} ;
2021-11-27 14:54:48 -05:00
struct CalendarFormat {
2021-12-02 21:16:16 -05:00
CalendarPatternIndexType full_format { 0 } ;
CalendarPatternIndexType long_format { 0 } ;
CalendarPatternIndexType medium_format { 0 } ;
CalendarPatternIndexType short_format { 0 } ;
2021-11-27 14:54:48 -05:00
} ;
struct Calendar {
StringIndexType calendar { 0 } ;
CalendarFormat date_formats { } ;
CalendarFormat time_formats { } ;
CalendarFormat date_time_formats { } ;
2021-12-02 21:16:16 -05:00
Vector < CalendarPatternIndexType > available_formats { } ;
2021-11-27 14:54:48 -05:00
} ;
2021-11-19 11:36:28 -05:00
struct Locale {
2021-11-27 14:54:48 -05:00
HashMap < String , Calendar > calendars ;
2021-11-19 11:36:28 -05:00
} ;
struct UnicodeLocaleData {
UniqueStringStorage < StringIndexType > unique_strings ;
2021-12-02 21:16:16 -05:00
UniqueStorage < CalendarPattern , CalendarPatternIndexType > unique_patterns ;
2021-11-19 11:36:28 -05:00
HashMap < String , Locale > locales ;
2021-11-28 09:25:15 -05:00
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
HashMap < String , Vector < Unicode : : HourCycle > > hour_cycles ;
Vector < String > hour_cycle_regions ;
2021-11-27 14:54:48 -05:00
Vector < String > calendars ;
2021-11-28 09:25:15 -05:00
Vector < Alias > calendar_aliases {
// FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158
{ " gregorian " sv , " gregory " sv } ,
} ;
2021-11-19 11:36:28 -05:00
} ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
static ErrorOr < void > parse_hour_cycles ( String core_path , UnicodeLocaleData & locale_data )
{
// https://unicode.org/reports/tr35/tr35-dates.html#Time_Data
LexicalPath time_data_path ( move ( core_path ) ) ;
time_data_path = time_data_path . append ( " supplemental " sv ) ;
time_data_path = time_data_path . append ( " timeData.json " sv ) ;
auto time_data_file = TRY ( Core : : File : : open ( time_data_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto time_data = TRY ( JsonValue : : from_string ( time_data_file - > read_all ( ) ) ) ;
auto const & supplemental_object = time_data . as_object ( ) . get ( " supplemental " sv ) ;
auto const & time_data_object = supplemental_object . as_object ( ) . get ( " timeData " sv ) ;
auto parse_hour_cycle = [ ] ( StringView hour_cycle ) - > Optional < Unicode : : HourCycle > {
if ( hour_cycle = = " h " sv )
return Unicode : : HourCycle : : H12 ;
if ( hour_cycle = = " H " sv )
return Unicode : : HourCycle : : H23 ;
if ( hour_cycle = = " K " sv )
return Unicode : : HourCycle : : H11 ;
if ( hour_cycle = = " k " sv )
return Unicode : : HourCycle : : H24 ;
return { } ;
} ;
time_data_object . as_object ( ) . for_each_member ( [ & ] ( auto const & key , JsonValue const & value ) {
auto allowed_hour_cycles_string = value . as_object ( ) . get ( " _allowed " sv ) . as_string ( ) ;
auto allowed_hour_cycles = allowed_hour_cycles_string . split_view ( ' ' ) ;
Vector < Unicode : : HourCycle > hour_cycles ;
for ( auto allowed_hour_cycle : allowed_hour_cycles ) {
if ( auto hour_cycle = parse_hour_cycle ( allowed_hour_cycle ) ; hour_cycle . has_value ( ) )
hour_cycles . append ( * hour_cycle ) ;
}
locale_data . hour_cycles . set ( key , move ( hour_cycles ) ) ;
if ( ! locale_data . hour_cycle_regions . contains_slow ( key ) )
locale_data . hour_cycle_regions . append ( key ) ;
} ) ;
return { } ;
} ;
2021-11-30 08:59:19 -05:00
static constexpr auto is_char ( char ch )
2021-11-27 14:54:48 -05:00
{
2021-11-30 08:59:19 -05:00
return [ ch ] ( auto c ) { return c = = ch ; } ;
}
// For patterns that are 12-hour aware, we need to generate two patterns: one with the day period
// (e.g. {ampm}) in the pattern, and one without the day period. We need to take care to remove
// extra spaces around the day period. Some example expected removals:
//
// "{hour}:{minute} {ampm}" becomes "{hour}:{minute}" (remove the space before {ampm})
// "{ampm} {hour}" becomes "{hour}" (remove the space after {ampm})
// "{hour}:{minute} {ampm} {timeZoneName}" becomes "{hour}:{minute} {timeZoneName}" (remove one of the spaces around {ampm})
static String remove_period_from_pattern ( String pattern )
{
for ( auto remove : AK : : Array { " ({ampm}) " sv , " {ampm} " sv , " ({dayPeriod}) " sv , " {dayPeriod} " sv } ) {
auto index = pattern . find ( remove ) ;
if ( ! index . has_value ( ) )
continue ;
constexpr u32 space = ' ' ;
constexpr u32 open = ' { ' ;
constexpr u32 close = ' } ' ;
Utf8View utf8_pattern { pattern } ;
Optional < u32 > before_removal ;
Optional < u32 > after_removal ;
for ( auto it = utf8_pattern . begin ( ) ; utf8_pattern . byte_offset_of ( it ) < * index ; + + it )
before_removal = * it ;
if ( auto it = utf8_pattern . iterator_at_byte_offset ( * index + remove . length ( ) ) ; it ! = utf8_pattern . end ( ) )
after_removal = * it ;
if ( ( before_removal = = space ) & & ( after_removal ! = open ) ) {
pattern = String : : formatted ( " {}{} " ,
pattern . substring_view ( 0 , * index - 1 ) ,
pattern . substring_view ( * index + remove . length ( ) ) ) ;
} else if ( ( after_removal = = space ) & & ( before_removal ! = close ) ) {
pattern = String : : formatted ( " {}{} " ,
pattern . substring_view ( 0 , * index ) ,
pattern . substring_view ( * index + remove . length ( ) + 1 ) ) ;
} else {
pattern = String : : formatted ( " {}{} " ,
pattern . substring_view ( 0 , * index ) ,
pattern . substring_view ( * index + remove . length ( ) ) ) ;
}
}
return pattern ;
}
2021-12-02 21:16:16 -05:00
2021-11-30 08:59:19 -05:00
static Optional < CalendarPatternIndexType > parse_date_time_pattern ( String pattern , UnicodeLocaleData & locale_data )
{
2021-11-27 14:54:48 -05:00
// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
2021-11-30 08:59:19 -05:00
using Unicode : : CalendarPatternStyle ;
CalendarPattern format { } ;
GenericLexer lexer { pattern } ;
StringBuilder builder ;
bool hour12 { false } ;
while ( ! lexer . is_eof ( ) ) {
// Literal strings enclosed by quotes are to be appended to the pattern as-is without further
// processing (this just avoids conflicts with the patterns below).
if ( lexer . next_is ( is_quote ) ) {
builder . append ( lexer . consume_quoted_string ( ) ) ;
continue ;
}
auto starting_char = lexer . peek ( ) ;
auto segment = lexer . consume_while ( [ & ] ( char ch ) { return ch = = starting_char ; } ) ;
// Era
if ( all_of ( segment , is_char ( ' G ' ) ) ) {
builder . append ( " {era} " ) ;
if ( segment . length ( ) < = 3 )
format . era = CalendarPatternStyle : : Short ;
else if ( segment . length ( ) = = 4 )
format . era = CalendarPatternStyle : : Long ;
else
format . era = CalendarPatternStyle : : Narrow ;
}
// Year
else if ( all_of ( segment , is_any_of ( " yYuUr " sv ) ) ) {
builder . append ( " {year} " ) ;
if ( segment . length ( ) = = 2 )
format . year = CalendarPatternStyle : : TwoDigit ;
else
format . year = CalendarPatternStyle : : Numeric ;
}
// Quarter
else if ( all_of ( segment , is_any_of ( " qQ " sv ) ) ) {
// Intl.DateTimeFormat does not support quarter formatting, so drop these patterns.
return { } ;
}
// Month
else if ( all_of ( segment , is_any_of ( " ML " sv ) ) ) {
builder . append ( " {month} " ) ;
if ( segment . length ( ) = = 1 )
format . month = CalendarPatternStyle : : Numeric ;
else if ( segment . length ( ) = = 2 )
format . month = CalendarPatternStyle : : TwoDigit ;
else if ( segment . length ( ) = = 3 )
format . month = CalendarPatternStyle : : Short ;
else if ( segment . length ( ) = = 4 )
format . month = CalendarPatternStyle : : Long ;
else if ( segment . length ( ) = = 5 )
format . month = CalendarPatternStyle : : Narrow ;
} else if ( all_of ( segment , is_char ( ' l ' ) ) ) {
// Using 'l' for month formatting is deprecated by TR-35, ensure it is not used.
return { } ;
}
// Week
else if ( all_of ( segment , is_any_of ( " wW " sv ) ) ) {
// Intl.DateTimeFormat does not support week formatting, so drop these patterns.
return { } ;
}
// Day
else if ( all_of ( segment , is_char ( ' d ' ) ) ) {
builder . append ( " {day} " ) ;
if ( segment . length ( ) = = 1 )
format . day = CalendarPatternStyle : : Numeric ;
else
format . day = CalendarPatternStyle : : TwoDigit ;
} else if ( all_of ( segment , is_any_of ( " DFG " sv ) ) ) {
builder . append ( " {day} " ) ;
format . day = CalendarPatternStyle : : Numeric ;
}
// Weekday
else if ( all_of ( segment , is_char ( ' E ' ) ) ) {
builder . append ( " {weekday} " ) ;
if ( segment . length ( ) = = 4 )
format . weekday = CalendarPatternStyle : : Long ;
else if ( segment . length ( ) = = 5 )
format . weekday = CalendarPatternStyle : : Narrow ;
else
format . weekday = CalendarPatternStyle : : Short ;
} else if ( all_of ( segment , is_any_of ( " ec " sv ) ) ) {
builder . append ( " {weekday} " ) ;
// TR-35 defines "e", "c", and "cc" as as numeric, and "ee" as 2-digit, but those
// pattern styles are not supported by Intl.DateTimeFormat.
if ( segment . length ( ) < = 2 )
return { } ;
if ( segment . length ( ) = = 4 )
format . weekday = CalendarPatternStyle : : Long ;
else if ( segment . length ( ) = = 5 )
format . weekday = CalendarPatternStyle : : Narrow ;
else
format . weekday = CalendarPatternStyle : : Short ;
}
// Period
else if ( all_of ( segment , is_any_of ( " ab " sv ) ) ) {
builder . append ( " {ampm} " ) ;
hour12 = true ;
if ( segment . length ( ) = = 4 )
format . day_period = CalendarPatternStyle : : Long ;
else if ( segment . length ( ) = = 5 )
format . day_period = CalendarPatternStyle : : Narrow ;
else
format . day_period = CalendarPatternStyle : : Short ;
} else if ( all_of ( segment , is_char ( ' B ' ) ) ) {
builder . append ( " {dayPeriod} " ) ;
hour12 = true ;
if ( segment . length ( ) = = 4 )
format . day_period = CalendarPatternStyle : : Long ;
else if ( segment . length ( ) = = 5 )
format . day_period = CalendarPatternStyle : : Narrow ;
else
format . day_period = CalendarPatternStyle : : Short ;
}
// Hour
else if ( all_of ( segment , is_any_of ( " hHKk " sv ) ) ) {
builder . append ( " {hour} " ) ;
if ( ( segment [ 0 ] = = ' h ' ) | | ( segment [ 0 ] = = ' K ' ) )
hour12 = true ;
if ( segment . length ( ) = = 1 )
format . hour = CalendarPatternStyle : : Numeric ;
else
format . hour = CalendarPatternStyle : : TwoDigit ;
} else if ( all_of ( segment , is_any_of ( " jJC " sv ) ) ) {
// TR-35 indicates these should not be used.
return { } ;
}
// Minute
else if ( all_of ( segment , is_char ( ' m ' ) ) ) {
builder . append ( " {minute} " ) ;
if ( segment . length ( ) = = 1 )
format . minute = CalendarPatternStyle : : Numeric ;
else
format . minute = CalendarPatternStyle : : TwoDigit ;
}
// Second
else if ( all_of ( segment , is_char ( ' s ' ) ) ) {
builder . append ( " {second} " ) ;
if ( segment . length ( ) = = 1 )
format . second = CalendarPatternStyle : : Numeric ;
else
format . second = CalendarPatternStyle : : TwoDigit ;
} else if ( all_of ( segment , is_char ( ' S ' ) ) ) {
builder . append ( " {fractionalSecondDigits} " ) ;
VERIFY ( segment . length ( ) < = 3 ) ;
format . fractional_second_digits = static_cast < u8 > ( segment . length ( ) ) ;
} else if ( all_of ( segment , is_char ( ' A ' ) ) ) {
// Intl.DateTimeFormat does not support millisecond formatting, so drop these patterns.
return { } ;
}
// Zone
else if ( all_of ( segment , is_any_of ( " zZOvVXx " ) ) ) {
builder . append ( " {timeZoneName} " ) ;
if ( segment . length ( ) < 4 )
format . time_zone_name = CalendarPatternStyle : : Short ;
else
format . time_zone_name = CalendarPatternStyle : : Long ;
}
// Non-patterns
else {
builder . append ( segment ) ;
}
}
pattern = builder . build ( ) ;
if ( hour12 ) {
auto pattern_without_period = remove_period_from_pattern ( pattern ) ;
format . pattern_index = locale_data . unique_strings . ensure ( move ( pattern_without_period ) ) ;
format . pattern12_index = locale_data . unique_strings . ensure ( move ( pattern ) ) ;
} else {
format . pattern_index = locale_data . unique_strings . ensure ( move ( pattern ) ) ;
}
2021-12-02 21:16:16 -05:00
return locale_data . unique_patterns . ensure ( move ( format ) ) ;
2021-11-27 14:54:48 -05:00
}
static ErrorOr < void > parse_calendars ( String locale_calendars_path , UnicodeLocaleData & locale_data , Locale & locale )
{
LexicalPath calendars_path ( move ( locale_calendars_path ) ) ;
if ( ! calendars_path . basename ( ) . starts_with ( " ca- " sv ) )
return { } ;
auto calendars_file = TRY ( Core : : File : : open ( calendars_path . string ( ) , Core : : OpenMode : : ReadOnly ) ) ;
auto calendars = TRY ( JsonValue : : from_string ( calendars_file - > read_all ( ) ) ) ;
auto const & main_object = calendars . as_object ( ) . get ( " main " sv ) ;
auto const & locale_object = main_object . as_object ( ) . get ( calendars_path . parent ( ) . basename ( ) ) ;
auto const & dates_object = locale_object . as_object ( ) . get ( " dates " sv ) ;
auto const & calendars_object = dates_object . as_object ( ) . get ( " calendars " sv ) ;
auto ensure_calendar = [ & ] ( auto const & calendar ) - > Calendar & {
return locale . calendars . ensure ( calendar , [ & ] ( ) {
auto calendar_index = locale_data . unique_strings . ensure ( calendar ) ;
return Calendar { . calendar = calendar_index } ;
} ) ;
} ;
auto parse_patterns = [ & ] ( auto & formats , auto const & patterns_object ) {
auto full_format = patterns_object . get ( " full " sv ) ;
2021-11-30 08:59:19 -05:00
formats . full_format = parse_date_time_pattern ( full_format . as_string ( ) , locale_data ) . value ( ) ;
2021-11-27 14:54:48 -05:00
auto long_format = patterns_object . get ( " long " sv ) ;
2021-11-30 08:59:19 -05:00
formats . long_format = parse_date_time_pattern ( long_format . as_string ( ) , locale_data ) . value ( ) ;
2021-11-27 14:54:48 -05:00
auto medium_format = patterns_object . get ( " medium " sv ) ;
2021-11-30 08:59:19 -05:00
formats . medium_format = parse_date_time_pattern ( medium_format . as_string ( ) , locale_data ) . value ( ) ;
2021-11-27 14:54:48 -05:00
auto short_format = patterns_object . get ( " short " sv ) ;
2021-11-30 08:59:19 -05:00
formats . short_format = parse_date_time_pattern ( short_format . as_string ( ) , locale_data ) . value ( ) ;
2021-11-27 14:54:48 -05:00
} ;
calendars_object . as_object ( ) . for_each_member ( [ & ] ( auto const & calendar_name , JsonValue const & value ) {
2021-11-30 21:23:13 -05:00
// The generic calendar is not a supported Unicode calendar key, so skip it:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale/calendar#unicode_calendar_keys
if ( calendar_name = = " generic " sv )
return ;
2021-11-27 14:54:48 -05:00
auto & calendar = ensure_calendar ( calendar_name ) ;
if ( ! locale_data . calendars . contains_slow ( calendar_name ) )
locale_data . calendars . append ( calendar_name ) ;
auto const & date_formats_object = value . as_object ( ) . get ( " dateFormats " sv ) ;
parse_patterns ( calendar . date_formats , date_formats_object . as_object ( ) ) ;
auto const & time_formats_object = value . as_object ( ) . get ( " timeFormats " sv ) ;
parse_patterns ( calendar . time_formats , time_formats_object . as_object ( ) ) ;
auto const & date_time_formats_object = value . as_object ( ) . get ( " dateTimeFormats " sv ) ;
parse_patterns ( calendar . date_time_formats , date_time_formats_object . as_object ( ) ) ;
2021-11-27 17:31:31 -05:00
auto const & available_formats = date_time_formats_object . as_object ( ) . get ( " availableFormats " sv ) ;
available_formats . as_object ( ) . for_each_member ( [ & ] ( auto const & , JsonValue const & pattern ) {
2021-11-30 08:59:19 -05:00
if ( auto pattern_index = parse_date_time_pattern ( pattern . as_string ( ) , locale_data ) ; pattern_index . has_value ( ) )
calendar . available_formats . append ( * pattern_index ) ;
2021-11-27 17:31:31 -05:00
} ) ;
2021-11-27 14:54:48 -05:00
} ) ;
return { } ;
}
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
static ErrorOr < void > parse_all_locales ( String core_path , String dates_path , UnicodeLocaleData & locale_data )
2021-11-19 11:36:28 -05:00
{
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
TRY ( parse_hour_cycles ( move ( core_path ) , locale_data ) ) ;
2021-11-19 11:36:28 -05:00
auto dates_iterator = TRY ( path_to_dir_iterator ( move ( dates_path ) ) ) ;
auto remove_variants_from_path = [ & ] ( String path ) - > ErrorOr < String > {
auto parsed_locale = TRY ( CanonicalLanguageID < StringIndexType > : : parse ( locale_data . unique_strings , LexicalPath : : basename ( path ) ) ) ;
StringBuilder builder ;
builder . append ( locale_data . unique_strings . get ( parsed_locale . language ) ) ;
if ( auto script = locale_data . unique_strings . get ( parsed_locale . script ) ; ! script . is_empty ( ) )
builder . appendff ( " -{} " , script ) ;
if ( auto region = locale_data . unique_strings . get ( parsed_locale . region ) ; ! region . is_empty ( ) )
builder . appendff ( " -{} " , region ) ;
return builder . build ( ) ;
} ;
while ( dates_iterator . has_next ( ) ) {
auto dates_path = TRY ( next_path_from_dir_iterator ( dates_iterator ) ) ;
2021-11-27 14:54:48 -05:00
auto calendars_iterator = TRY ( path_to_dir_iterator ( dates_path , { } ) ) ;
2021-11-19 11:36:28 -05:00
auto language = TRY ( remove_variants_from_path ( dates_path ) ) ;
2021-11-27 14:54:48 -05:00
auto & locale = locale_data . locales . ensure ( language ) ;
2021-11-19 11:36:28 -05:00
2021-11-27 14:54:48 -05:00
while ( calendars_iterator . has_next ( ) ) {
auto calendars_path = TRY ( next_path_from_dir_iterator ( calendars_iterator ) ) ;
TRY ( parse_calendars ( move ( calendars_path ) , locale_data , locale ) ) ;
}
2021-11-19 11:36:28 -05:00
}
return { } ;
}
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
static String format_identifier ( StringView owner , String identifier )
2021-11-27 14:54:48 -05:00
{
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
identifier = identifier . replace ( " - " sv , " _ " sv , true ) ;
if ( all_of ( identifier , is_ascii_digit ) )
return String : : formatted ( " {}_{} " , owner [ 0 ] , identifier ) ;
if ( is_ascii_lower_alpha ( identifier [ 0 ] ) )
return String : : formatted ( " {:c}{} " , to_ascii_uppercase ( identifier [ 0 ] ) , identifier . substring_view ( 1 ) ) ;
return identifier ;
2021-11-27 14:54:48 -05:00
}
static void generate_unicode_locale_header ( Core : : File & file , UnicodeLocaleData & locale_data )
2021-11-19 11:36:28 -05:00
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . append ( R " ~~~(
# pragma once
2021-11-27 14:54:48 -05:00
# include <AK/Optional.h>
2021-11-19 11:36:28 -05:00
# include <AK/StringView.h>
# include <LibUnicode/Forward.h>
namespace Unicode {
2021-11-27 14:54:48 -05:00
) ~ ~ ~ " );
2021-11-28 09:25:15 -05:00
generate_enum ( generator , format_identifier , " Calendar " sv , { } , locale_data . calendars , locale_data . calendar_aliases ) ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
generate_enum ( generator , format_identifier , " HourCycleRegion " sv , { } , locale_data . hour_cycle_regions ) ;
2021-11-27 14:54:48 -05:00
generator . append ( R " ~~~(
namespace Detail {
Optional < Calendar > calendar_from_string ( StringView calendar ) ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
Optional < HourCycleRegion > hour_cycle_region_from_string ( StringView hour_cycle_region ) ;
Vector < Unicode : : HourCycle > get_regional_hour_cycles ( StringView region ) ;
2021-11-27 14:54:48 -05:00
Optional < Unicode : : CalendarFormat > get_calendar_date_format ( StringView locale , StringView calendar ) ;
Optional < Unicode : : CalendarFormat > get_calendar_time_format ( StringView locale , StringView calendar ) ;
Optional < Unicode : : CalendarFormat > get_calendar_date_time_format ( StringView locale , StringView calendar ) ;
2021-11-27 17:31:31 -05:00
Vector < Unicode : : CalendarPattern > get_calendar_available_formats ( StringView locale , StringView calendar ) ;
2021-11-27 14:54:48 -05:00
}
2021-11-19 11:36:28 -05:00
}
) ~ ~ ~ " );
VERIFY ( file . write ( generator . as_string_view ( ) ) ) ;
}
static void generate_unicode_locale_implementation ( Core : : File & file , UnicodeLocaleData & locale_data )
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . set ( " string_index_type " sv , s_string_index_type ) ;
2021-12-02 21:16:16 -05:00
generator . set ( " calendar_pattern_index_type " sv , s_calendar_pattern_index_type ) ;
2021-11-19 11:36:28 -05:00
generator . append ( R " ~~~(
# include <AK/Array.h>
2021-11-27 14:54:48 -05:00
# include <AK/BinarySearch.h>
# include <LibUnicode/DateTimeFormat.h>
# include <LibUnicode/Locale.h>
2021-11-19 11:36:28 -05:00
# include <LibUnicode/UnicodeDateTimeFormat.h>
namespace Unicode : : Detail {
) ~ ~ ~ " );
locale_data . unique_strings . generate ( generator ) ;
generator . append ( R " ~~~(
2021-11-27 14:54:48 -05:00
struct CalendarPattern {
Unicode : : CalendarPattern to_unicode_calendar_pattern ( ) const {
Unicode : : CalendarPattern calendar_pattern { } ;
2021-11-30 08:59:19 -05:00
2021-11-27 14:54:48 -05:00
calendar_pattern . pattern = s_string_list [ pattern ] ;
2021-11-30 08:59:19 -05:00
if ( pattern12 ! = 0 )
calendar_pattern . pattern12 = s_string_list [ pattern12 ] ;
if ( era ! = - 1 )
calendar_pattern . era = static_cast < Unicode : : CalendarPatternStyle > ( era ) ;
if ( year ! = - 1 )
calendar_pattern . year = static_cast < Unicode : : CalendarPatternStyle > ( year ) ;
if ( month ! = - 1 )
calendar_pattern . month = static_cast < Unicode : : CalendarPatternStyle > ( month ) ;
if ( weekday ! = - 1 )
calendar_pattern . weekday = static_cast < Unicode : : CalendarPatternStyle > ( weekday ) ;
if ( day ! = - 1 )
calendar_pattern . day = static_cast < Unicode : : CalendarPatternStyle > ( day ) ;
if ( day_period ! = - 1 )
calendar_pattern . day_period = static_cast < Unicode : : CalendarPatternStyle > ( day_period ) ;
if ( hour ! = - 1 )
calendar_pattern . hour = static_cast < Unicode : : CalendarPatternStyle > ( hour ) ;
if ( minute ! = - 1 )
calendar_pattern . minute = static_cast < Unicode : : CalendarPatternStyle > ( minute ) ;
if ( second ! = - 1 )
calendar_pattern . second = static_cast < Unicode : : CalendarPatternStyle > ( second ) ;
if ( fractional_second_digits ! = - 1 )
calendar_pattern . fractional_second_digits = static_cast < u8 > ( fractional_second_digits ) ;
if ( time_zone_name ! = - 1 )
calendar_pattern . time_zone_name = static_cast < Unicode : : CalendarPatternStyle > ( time_zone_name ) ;
2021-11-27 14:54:48 -05:00
return calendar_pattern ;
}
@ string_index_type @ pattern { 0 } ;
2021-11-30 08:59:19 -05:00
@ string_index_type @ pattern12 { 0 } ;
i8 era { - 1 } ;
i8 year { - 1 } ;
i8 month { - 1 } ;
i8 weekday { - 1 } ;
i8 day { - 1 } ;
i8 day_period { - 1 } ;
i8 hour { - 1 } ;
i8 minute { - 1 } ;
i8 second { - 1 } ;
i8 fractional_second_digits { - 1 } ;
i8 time_zone_name { - 1 } ;
2021-11-27 14:54:48 -05:00
} ;
2021-12-02 21:16:16 -05:00
) ~ ~ ~ " );
2021-11-27 14:54:48 -05:00
2021-11-30 08:59:19 -05:00
locale_data . unique_patterns . generate ( generator , " CalendarPattern " sv , " s_calendar_patterns " sv , 10 ) ;
2021-12-02 21:16:16 -05:00
generator . append ( R " ~~~(
2021-11-27 14:54:48 -05:00
struct CalendarFormat {
Unicode : : CalendarFormat to_unicode_calendar_format ( ) const {
Unicode : : CalendarFormat calendar_format { } ;
2021-12-02 21:16:16 -05:00
calendar_format . full_format = s_calendar_patterns [ full_format ] . to_unicode_calendar_pattern ( ) ;
calendar_format . long_format = s_calendar_patterns [ long_format ] . to_unicode_calendar_pattern ( ) ;
calendar_format . medium_format = s_calendar_patterns [ medium_format ] . to_unicode_calendar_pattern ( ) ;
calendar_format . short_format = s_calendar_patterns [ short_format ] . to_unicode_calendar_pattern ( ) ;
2021-11-27 14:54:48 -05:00
return calendar_format ;
}
2021-12-02 21:16:16 -05:00
@ calendar_pattern_index_type @ full_format { 0 } ;
@ calendar_pattern_index_type @ long_format { 0 } ;
@ calendar_pattern_index_type @ medium_format { 0 } ;
@ calendar_pattern_index_type @ short_format { 0 } ;
2021-11-27 14:54:48 -05:00
} ;
struct CalendarData {
@ string_index_type @ calendar { 0 } ;
CalendarFormat date_formats { } ;
CalendarFormat time_formats { } ;
CalendarFormat date_time_formats { } ;
2021-12-02 21:16:16 -05:00
Span < @ calendar_pattern_index_type @ const > available_formats { } ;
2021-11-27 14:54:48 -05:00
} ;
) ~ ~ ~ " );
auto append_calendar_format = [ & ] ( auto const & calendar_format ) {
2021-12-02 21:16:16 -05:00
generator . set ( " full_format " , String : : number ( calendar_format . full_format ) ) ;
generator . set ( " long_format " , String : : number ( calendar_format . long_format ) ) ;
generator . set ( " medium_format " , String : : number ( calendar_format . medium_format ) ) ;
generator . set ( " short_format " , String : : number ( calendar_format . short_format ) ) ;
generator . append ( " { @full_format@, @long_format@, @medium_format@, @short_format@ }, " ) ;
2021-11-27 14:54:48 -05:00
} ;
auto append_calendars = [ & ] ( String name , auto const & calendars ) {
2021-11-30 21:14:44 -05:00
auto format_name = [ & ] ( StringView calendar_key ) {
return String : : formatted ( " {}_{}_formats " , name , calendar_key ) ;
} ;
for ( auto const & calendar_key : locale_data . calendars ) {
auto const & calendar = calendars . find ( calendar_key ) - > value ;
generator . set ( " name " , format_name ( calendar_key ) ) ;
generator . set ( " size " , String : : number ( calendar . available_formats . size ( ) ) ) ;
generator . append ( R " ~~~(
2021-12-02 21:16:16 -05:00
static constexpr Array < @ calendar_pattern_index_type @ , @ size @ > @ name @ { { ) ~ ~ ~ " );
2021-11-30 21:14:44 -05:00
2021-12-02 21:16:16 -05:00
bool first = true ;
for ( auto format : calendar . available_formats ) {
generator . append ( first ? " " : " , " ) ;
generator . append ( String : : number ( format ) ) ;
first = false ;
2021-11-30 21:14:44 -05:00
}
2021-12-02 21:16:16 -05:00
generator . append ( " } }; " ) ;
2021-11-30 21:14:44 -05:00
}
2021-11-27 14:54:48 -05:00
generator . set ( " name " , name ) ;
generator . set ( " size " , String : : number ( calendars . size ( ) ) ) ;
generator . append ( R " ~~~(
static constexpr Array < CalendarData , @ size @ > @ name @ { { ) ~ ~ ~ " );
for ( auto const & calendar_key : locale_data . calendars ) {
auto const & calendar = calendars . find ( calendar_key ) - > value ;
2021-11-30 21:14:44 -05:00
generator . set ( " name " , format_name ( calendar_key ) ) ;
2021-11-27 14:54:48 -05:00
generator . set ( " calendar " sv , String : : number ( calendar . calendar ) ) ;
generator . append ( R " ~~~(
{ @ calendar @ , ) ~ ~ ~ " );
append_calendar_format ( calendar . date_formats ) ;
generator . append ( " " ) ;
append_calendar_format ( calendar . time_formats ) ;
generator . append ( " " ) ;
append_calendar_format ( calendar . date_time_formats ) ;
2021-11-30 21:14:44 -05:00
generator . append ( " @name@.span() }, " ) ;
2021-11-27 14:54:48 -05:00
}
generator . append ( R " ~~~(
} } ;
) ~ ~ ~ " );
} ;
2021-11-28 10:39:55 -05:00
auto append_hour_cycles = [ & ] ( String name , auto const & hour_cycle_region ) {
auto const & hour_cycles = locale_data . hour_cycles . find ( hour_cycle_region ) - > value ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
generator . set ( " name " , name ) ;
generator . set ( " size " , String : : number ( hour_cycles . size ( ) ) ) ;
generator . append ( R " ~~~(
static constexpr Array < u8 , @ size @ > @ name @ { { ) ~ ~ ~ " );
for ( auto hour_cycle : hour_cycles ) {
generator . set ( " hour_cycle " , String : : number ( static_cast < u8 > ( hour_cycle ) ) ) ;
generator . append ( " @hour_cycle@, " ) ;
}
generator . append ( " } }; " ) ;
} ;
2021-11-27 14:54:48 -05:00
generate_mapping ( generator , locale_data . locales , " CalendarData " sv , " s_calendars " sv , " s_calendars_{} " , [ & ] ( auto const & name , auto const & value ) { append_calendars ( name , value . calendars ) ; } ) ;
2021-11-28 10:39:55 -05:00
generate_mapping ( generator , locale_data . hour_cycle_regions , " u8 " sv , " s_hour_cycles " sv , " s_hour_cycles_{} " , [ & ] ( auto const & name , auto const & value ) { append_hour_cycles ( name , value ) ; } ) ;
2021-11-27 14:54:48 -05:00
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
auto append_from_string = [ & ] ( StringView enum_title , StringView enum_snake , auto const & values , Vector < Alias > const & aliases = { } ) {
2021-11-27 14:54:48 -05:00
HashValueMap < String > hashes ;
hashes . ensure_capacity ( values . size ( ) ) ;
for ( auto const & value : values )
hashes . set ( value . hash ( ) , format_identifier ( enum_title , value ) ) ;
2021-11-28 09:25:15 -05:00
for ( auto const & alias : aliases )
hashes . set ( alias . alias . hash ( ) , format_identifier ( enum_title , alias . alias ) ) ;
2021-11-27 14:54:48 -05:00
generate_value_from_string ( generator , " {}_from_string " sv , enum_title , enum_snake , move ( hashes ) ) ;
} ;
2021-11-28 09:25:15 -05:00
append_from_string ( " Calendar " sv , " calendar " sv , locale_data . calendars , locale_data . calendar_aliases ) ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
append_from_string ( " HourCycleRegion " sv , " hour_cycle_region " sv , locale_data . hour_cycle_regions ) ;
2021-11-27 14:54:48 -05:00
generator . append ( R " ~~~(
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
Vector < Unicode : : HourCycle > get_regional_hour_cycles ( StringView region )
{
auto region_value = hour_cycle_region_from_string ( region ) ;
if ( ! region_value . has_value ( ) )
return { } ;
auto region_index = to_underlying ( * region_value ) ;
auto const & regional_hour_cycles = s_hour_cycles . at ( region_index ) ;
Vector < Unicode : : HourCycle > hour_cycles ;
hour_cycles . ensure_capacity ( regional_hour_cycles . size ( ) ) ;
for ( auto hour_cycle : regional_hour_cycles )
hour_cycles . unchecked_append ( static_cast < Unicode : : HourCycle > ( hour_cycle ) ) ;
return hour_cycles ;
}
2021-11-27 14:54:48 -05:00
static CalendarData const * find_calendar_data ( StringView locale , StringView calendar )
{
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return nullptr ;
auto calendar_value = calendar_from_string ( calendar ) ;
if ( ! calendar_value . has_value ( ) )
return nullptr ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto calendar_index = to_underlying ( * calendar_value ) ;
auto const & calendars = s_calendars . at ( locale_index ) ;
return & calendars [ calendar_index ] ;
}
Optional < Unicode : : CalendarFormat > get_calendar_date_format ( StringView locale , StringView calendar )
{
if ( auto const * data = find_calendar_data ( locale , calendar ) ; data ! = nullptr )
return data - > date_formats . to_unicode_calendar_format ( ) ;
return { } ;
}
Optional < Unicode : : CalendarFormat > get_calendar_time_format ( StringView locale , StringView calendar )
{
if ( auto const * data = find_calendar_data ( locale , calendar ) ; data ! = nullptr )
return data - > time_formats . to_unicode_calendar_format ( ) ;
return { } ;
}
Optional < Unicode : : CalendarFormat > get_calendar_date_time_format ( StringView locale , StringView calendar )
{
if ( auto const * data = find_calendar_data ( locale , calendar ) ; data ! = nullptr )
return data - > date_time_formats . to_unicode_calendar_format ( ) ;
return { } ;
}
2021-11-27 17:31:31 -05:00
Vector < Unicode : : CalendarPattern > get_calendar_available_formats ( StringView locale , StringView calendar )
{
Vector < Unicode : : CalendarPattern > result { } ;
if ( auto const * data = find_calendar_data ( locale , calendar ) ; data ! = nullptr ) {
2021-11-30 21:14:44 -05:00
result . ensure_capacity ( data - > available_formats . size ( ) ) ;
2021-11-27 17:31:31 -05:00
2021-11-30 21:14:44 -05:00
for ( auto const & format : data - > available_formats )
2021-12-02 21:16:16 -05:00
result . unchecked_append ( s_calendar_patterns [ format ] . to_unicode_calendar_pattern ( ) ) ;
2021-11-27 17:31:31 -05:00
}
return result ;
}
2021-11-19 11:36:28 -05:00
}
) ~ ~ ~ " );
VERIFY ( file . write ( generator . as_string_view ( ) ) ) ;
}
ErrorOr < int > serenity_main ( Main : : Arguments arguments )
{
StringView generated_header_path ;
StringView generated_implementation_path ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
StringView core_path ;
2021-11-19 11:36:28 -05:00
StringView dates_path ;
Core : : ArgsParser args_parser ;
args_parser . add_option ( generated_header_path , " Path to the Unicode locale header file to generate " , " generated-header-path " , ' h ' , " generated-header-path " ) ;
args_parser . add_option ( generated_implementation_path , " Path to the Unicode locale implementation file to generate " , " generated-implementation-path " , ' c ' , " generated-implementation-path " ) ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
args_parser . add_option ( core_path , " Path to cldr-core directory " , " core-path " , ' r ' , " core-path " ) ;
2021-11-19 11:36:28 -05:00
args_parser . add_option ( dates_path , " Path to cldr-dates directory " , " dates-path " , ' d ' , " dates-path " ) ;
args_parser . parse ( arguments ) ;
auto open_file = [ & ] ( StringView path ) - > ErrorOr < NonnullRefPtr < Core : : File > > {
if ( path . is_empty ( ) ) {
args_parser . print_usage ( stderr , arguments . argv [ 0 ] ) ;
return Error : : from_string_literal ( " Must provide all command line options " sv ) ;
}
return Core : : File : : open ( path , Core : : OpenMode : : ReadWrite ) ;
} ;
auto generated_header_file = TRY ( open_file ( generated_header_path ) ) ;
auto generated_implementation_file = TRY ( open_file ( generated_implementation_path ) ) ;
UnicodeLocaleData locale_data ;
LibUnicode: Parse and generate regional hour cycles
Unlike most data in the CLDR, hour cycles are not stored on a per-locale
basis. Instead, they are keyed by a string that is usually a region, but
sometimes is a locale. Therefore, given a locale, to determine the hour
cycles for that locale, we:
1. Check if the locale itself is assigned hour cycles.
2. If the locale has a region, check if that region is assigned hour
cycles.
3. Otherwise, maximize that locale, and if the maximized locale has
a region, check if that region is assigned hour cycles.
4. If the above all fail, fallback to the "001" region.
Further, each locale's default hour cycle is the first assigned hour
cycle.
2021-11-27 20:57:21 -05:00
TRY ( parse_all_locales ( core_path , dates_path , locale_data ) ) ;
2021-11-19 11:36:28 -05:00
generate_unicode_locale_header ( generated_header_file , locale_data ) ;
generate_unicode_locale_implementation ( generated_implementation_file , locale_data ) ;
return 0 ;
}