2022-01-26 08:56:09 -05:00
/*
2023-02-02 21:37:08 -05:00
* Copyright ( c ) 2022 - 2023 , Tim Flynn < trflynn89 @ serenityos . org >
2022-01-26 08:56:09 -05:00
*
* SPDX - License - Identifier : BSD - 2 - Clause
*/
2022-09-02 10:27:46 -04:00
# include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
2023-12-16 17:49:34 +03:30
# include <AK/ByteString.h>
2022-01-26 08:56:09 -05:00
# include <AK/Format.h>
# include <AK/HashMap.h>
# include <AK/JsonObject.h>
# include <AK/JsonParser.h>
# include <AK/JsonValue.h>
# include <AK/LexicalPath.h>
# include <AK/SourceGenerator.h>
# include <AK/StringBuilder.h>
# include <LibCore/ArgsParser.h>
2023-03-15 15:38:20 +00:00
# include <LibCore/Directory.h>
2022-09-02 12:11:30 -04:00
# include <LibLocale/Locale.h>
# include <LibLocale/RelativeTimeFormat.h>
2022-01-26 08:56:09 -05:00
2022-01-26 10:49:58 -05:00
struct RelativeTimeFormat {
unsigned hash ( ) const
{
auto hash = time_unit . hash ( ) ;
hash = pair_int_hash ( hash , style . hash ( ) ) ;
hash = pair_int_hash ( hash , plurality . hash ( ) ) ;
hash = pair_int_hash ( hash , tense_or_number ) ;
hash = pair_int_hash ( hash , pattern ) ;
return hash ;
}
bool operator = = ( RelativeTimeFormat const & other ) const
{
return ( time_unit = = other . time_unit )
& & ( plurality = = other . plurality )
& & ( style = = other . style )
& & ( tense_or_number = = other . tense_or_number )
& & ( pattern = = other . pattern ) ;
}
2023-12-16 17:49:34 +03:30
ByteString time_unit ;
ByteString style ;
ByteString plurality ;
2022-11-18 11:04:33 -05:00
size_t tense_or_number { 0 } ;
size_t pattern { 0 } ;
2022-01-26 10:49:58 -05:00
} ;
template < >
struct AK : : Formatter < RelativeTimeFormat > : Formatter < FormatString > {
ErrorOr < void > format ( FormatBuilder & builder , RelativeTimeFormat const & format )
{
return Formatter < FormatString > : : format ( builder ,
2022-07-11 17:32:29 +00:00
" {{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }} " sv ,
2022-01-26 10:49:58 -05:00
format . time_unit ,
format . style ,
format . plurality ,
format . tense_or_number ,
format . pattern ) ;
}
} ;
template < >
2023-11-08 20:29:12 +01:00
struct AK : : Traits < RelativeTimeFormat > : public DefaultTraits < RelativeTimeFormat > {
2022-01-26 10:49:58 -05:00
static unsigned hash ( RelativeTimeFormat const & format ) { return format . hash ( ) ; }
} ;
2022-09-02 11:48:05 -04:00
struct LocaleData {
2022-11-18 11:04:33 -05:00
Vector < size_t > time_units ;
2022-01-26 08:56:09 -05:00
} ;
2022-09-02 11:48:05 -04:00
struct CLDR {
2022-11-18 11:04:33 -05:00
UniqueStringStorage unique_strings ;
UniqueStorage < RelativeTimeFormat > unique_formats ;
2022-01-26 10:49:58 -05:00
2023-12-16 17:49:34 +03:30
HashMap < ByteString , LocaleData > locales ;
2022-01-26 08:56:09 -05:00
} ;
2023-12-16 17:49:34 +03:30
static ErrorOr < void > parse_date_fields ( ByteString locale_dates_path , CLDR & cldr , LocaleData & locale )
2022-01-26 10:49:58 -05:00
{
LexicalPath date_fields_path ( move ( locale_dates_path ) ) ;
date_fields_path = date_fields_path . append ( " dateFields.json " sv ) ;
2022-02-06 16:09:37 -05:00
auto date_fields = TRY ( read_json_file ( date_fields_path . string ( ) ) ) ;
2022-12-21 14:37:27 +00:00
auto const & main_object = date_fields . as_object ( ) . get_object ( " main " sv ) . value ( ) ;
auto const & locale_object = main_object . get_object ( date_fields_path . parent ( ) . basename ( ) ) . value ( ) ;
auto const & dates_object = locale_object . get_object ( " dates " sv ) . value ( ) ;
auto const & fields_object = dates_object . get_object ( " fields " sv ) . value ( ) ;
2022-01-26 10:49:58 -05:00
auto is_sanctioned_unit = [ ] ( auto unit ) {
// This is a copy of the time units sanctioned for use within ECMA-402.
// https://tc39.es/ecma402/#sec-singularrelativetimeunit
return unit . is_one_of ( " second " sv , " minute " sv , " hour " sv , " day " sv , " week " sv , " month " sv , " quarter " sv , " year " sv ) ;
} ;
auto parse_pattern = [ & ] ( auto unit , auto style , auto plurality , auto tense_or_number , auto const & pattern ) {
RelativeTimeFormat format { } ;
format . time_unit = unit . to_titlecase_string ( ) ;
format . style = style . to_titlecase_string ( ) ;
format . plurality = plurality . to_titlecase_string ( ) ;
2022-09-02 11:48:05 -04:00
format . tense_or_number = cldr . unique_strings . ensure ( tense_or_number ) ;
format . pattern = cldr . unique_strings . ensure ( pattern . as_string ( ) ) ;
2022-01-26 10:49:58 -05:00
2022-09-02 11:48:05 -04:00
locale . time_units . append ( cldr . unique_formats . ensure ( move ( format ) ) ) ;
2022-01-26 10:49:58 -05:00
} ;
2022-12-21 14:37:27 +00:00
fields_object . for_each_member ( [ & ] ( auto const & unit_and_style , auto const & patterns ) {
2022-01-26 10:49:58 -05:00
auto segments = unit_and_style . split_view ( ' - ' ) ;
auto unit = segments [ 0 ] ;
auto style = ( segments . size ( ) > 1 ) ? segments [ 1 ] : " long " sv ;
if ( ! is_sanctioned_unit ( unit ) )
return ;
patterns . as_object ( ) . for_each_member ( [ & ] ( auto const & type , auto const & pattern_value ) {
constexpr auto number_key = " relative-type- " sv ;
constexpr auto tense_key = " relativeTime-type- " sv ;
constexpr auto plurality_key = " relativeTimePattern-count- " sv ;
if ( type . starts_with ( number_key ) ) {
auto number = type . substring_view ( number_key . length ( ) ) ;
parse_pattern ( unit , style , " Other " sv , number , pattern_value ) ;
} else if ( type . starts_with ( tense_key ) ) {
pattern_value . as_object ( ) . for_each_member ( [ & ] ( auto const & key , auto const & pattern ) {
VERIFY ( key . starts_with ( plurality_key ) ) ;
auto plurality = key . substring_view ( plurality_key . length ( ) ) ;
auto tense = type . substring_view ( tense_key . length ( ) ) ;
parse_pattern ( unit , style , plurality , tense , pattern ) ;
} ) ;
}
} ) ;
} ) ;
return { } ;
}
2023-12-16 17:49:34 +03:30
static ErrorOr < void > parse_all_locales ( ByteString dates_path , CLDR & cldr )
2022-01-26 08:56:09 -05:00
{
2023-12-16 17:49:34 +03:30
auto remove_variants_from_path = [ & ] ( ByteString path ) - > ErrorOr < ByteString > {
2022-11-18 11:04:33 -05:00
auto parsed_locale = TRY ( CanonicalLanguageID : : parse ( cldr . unique_strings , LexicalPath : : basename ( path ) ) ) ;
2022-01-26 08:56:09 -05:00
StringBuilder builder ;
2022-09-02 11:48:05 -04:00
builder . append ( cldr . unique_strings . get ( parsed_locale . language ) ) ;
if ( auto script = cldr . unique_strings . get ( parsed_locale . script ) ; ! script . is_empty ( ) )
2022-01-26 08:56:09 -05:00
builder . appendff ( " -{} " , script ) ;
2022-09-02 11:48:05 -04:00
if ( auto region = cldr . unique_strings . get ( parsed_locale . region ) ; ! region . is_empty ( ) )
2022-01-26 08:56:09 -05:00
builder . appendff ( " -{} " , region ) ;
2023-12-16 17:49:34 +03:30
return builder . to_byte_string ( ) ;
2022-01-26 08:56:09 -05:00
} ;
2023-03-15 15:38:20 +00:00
TRY ( Core : : Directory : : for_each_entry ( TRY ( String : : formatted ( " {}/main " , dates_path ) ) , Core : : DirIterator : : SkipParentAndBaseDir , [ & ] ( auto & entry , auto & directory ) - > ErrorOr < IterationDecision > {
auto dates_path = LexicalPath : : join ( directory . path ( ) . string ( ) , entry . name ) . string ( ) ;
2022-01-26 08:56:09 -05:00
auto language = TRY ( remove_variants_from_path ( dates_path ) ) ;
2022-09-02 11:48:05 -04:00
auto & locale = cldr . locales . ensure ( language ) ;
TRY ( parse_date_fields ( move ( dates_path ) , cldr , locale ) ) ;
2023-03-15 15:38:20 +00:00
return IterationDecision : : Continue ;
} ) ) ;
2022-01-26 08:56:09 -05:00
return { } ;
}
2023-05-03 18:45:18 -04:00
static ErrorOr < void > generate_unicode_locale_header ( Core : : InputBufferedFile & file , CLDR & )
2022-01-26 08:56:09 -05:00
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
generator . append ( R " ~~~(
# pragma once
2022-09-02 12:11:30 -04:00
# include <LibLocale/Forward.h>
2022-01-26 08:56:09 -05:00
2022-09-02 12:01:10 -04:00
namespace Locale {
2022-01-26 08:56:09 -05:00
) ~ ~ ~ " );
generator . append ( R " ~~~(
}
) ~ ~ ~ " );
2023-03-01 16:28:32 +01:00
TRY ( file . write_until_depleted ( generator . as_string_view ( ) . bytes ( ) ) ) ;
2022-02-06 16:09:37 -05:00
return { } ;
2022-01-26 08:56:09 -05:00
}
2023-05-03 18:45:18 -04:00
static ErrorOr < void > generate_unicode_locale_implementation ( Core : : InputBufferedFile & file , CLDR & cldr )
2022-01-26 08:56:09 -05:00
{
StringBuilder builder ;
SourceGenerator generator { builder } ;
2022-11-18 11:04:33 -05:00
generator . set ( " string_index_type " sv , cldr . unique_strings . type_that_fits ( ) ) ;
generator . set ( " relative_time_format_index_type " sv , cldr . unique_formats . type_that_fits ( ) ) ;
2022-01-26 08:56:09 -05:00
generator . append ( R " ~~~(
# include <AK/Array.h>
# include <AK/StringView.h>
2022-01-26 10:49:58 -05:00
# include <AK/Vector.h>
2022-09-02 12:11:30 -04:00
# include <LibLocale/Locale.h>
# include <LibLocale/PluralRules.h>
# include <LibLocale/RelativeTimeFormat.h>
2022-09-02 11:04:53 -04:00
# include <LibLocale/RelativeTimeFormatData.h>
2022-01-26 08:56:09 -05:00
2022-09-02 12:01:10 -04:00
namespace Locale {
2022-01-26 08:56:09 -05:00
) ~ ~ ~ " );
2022-09-02 11:48:05 -04:00
cldr . unique_strings . generate ( generator ) ;
2022-01-26 08:56:09 -05:00
generator . append ( R " ~~~(
2022-01-26 10:49:58 -05:00
struct RelativeTimeFormatImpl {
RelativeTimeFormat to_relative_time_format ( ) const
{
RelativeTimeFormat relative_time_format { } ;
relative_time_format . plurality = plurality ;
2022-08-15 13:01:42 -04:00
relative_time_format . pattern = decode_string ( pattern ) ;
2022-01-26 10:49:58 -05:00
return relative_time_format ;
}
TimeUnit time_unit ;
Style style ;
2022-07-07 13:59:46 -04:00
PluralCategory plurality ;
2022-01-26 10:49:58 -05:00
@ string_index_type @ tense_or_number { 0 } ;
@ string_index_type @ pattern { 0 } ;
} ;
) ~ ~ ~ " );
2022-09-02 11:48:05 -04:00
cldr . unique_formats . generate ( generator , " RelativeTimeFormatImpl " sv , " s_relative_time_formats " sv , 10 ) ;
2022-01-26 10:49:58 -05:00
2023-12-16 17:49:34 +03:30
auto append_list = [ & ] ( ByteString name , auto const & list ) {
2022-01-26 10:49:58 -05:00
generator . set ( " name " , name ) ;
2023-12-16 17:49:34 +03:30
generator . set ( " size " , ByteString : : number ( list . size ( ) ) ) ;
2022-01-26 10:49:58 -05:00
generator . append ( R " ~~~(
static constexpr Array < @ relative_time_format_index_type @ , @ size @ > @ name @ { { ) ~ ~ ~ " );
bool first = true ;
for ( auto index : list ) {
2022-07-11 17:32:29 +00:00
generator . append ( first ? " " sv : " , " sv ) ;
2023-12-16 17:49:34 +03:30
generator . append ( ByteString : : number ( index ) ) ;
2022-01-26 10:49:58 -05:00
first = false ;
}
generator . append ( " } }; " ) ;
} ;
2022-11-18 11:04:33 -05:00
generate_mapping ( generator , cldr . locales , cldr . unique_formats . type_that_fits ( ) , " s_locale_relative_time_formats " sv , " s_number_systems_digits_{} " sv , nullptr , [ & ] ( auto const & name , auto const & value ) { append_list ( name , value . time_units ) ; } ) ;
2022-01-26 10:49:58 -05:00
generator . append ( R " ~~~(
2023-08-22 16:45:22 -04:00
Vector < RelativeTimeFormat > get_relative_time_format_patterns ( StringView locale , TimeUnit time_unit , StringView tense_or_number , Style style )
2022-01-26 10:49:58 -05:00
{
Vector < RelativeTimeFormat > formats ;
auto locale_value = locale_from_string ( locale ) ;
if ( ! locale_value . has_value ( ) )
return formats ;
auto locale_index = to_underlying ( * locale_value ) - 1 ; // Subtract 1 because 0 == Locale::None.
auto const & locale_formats = s_locale_relative_time_formats . at ( locale_index ) ;
for ( auto const & locale_format_index : locale_formats ) {
auto const & locale_format = s_relative_time_formats . at ( locale_format_index ) ;
if ( locale_format . time_unit ! = time_unit )
continue ;
if ( locale_format . style ! = style )
continue ;
2022-08-15 13:01:42 -04:00
if ( decode_string ( locale_format . tense_or_number ) ! = tense_or_number )
2022-01-26 10:49:58 -05:00
continue ;
2023-08-22 16:45:22 -04:00
formats . append ( locale_format . to_relative_time_format ( ) ) ;
2022-01-26 10:49:58 -05:00
}
return formats ;
}
2022-01-26 08:56:09 -05:00
}
) ~ ~ ~ " );
2023-03-01 16:28:32 +01:00
TRY ( file . write_until_depleted ( generator . as_string_view ( ) . bytes ( ) ) ) ;
2022-02-06 16:09:37 -05:00
return { } ;
2022-01-26 08:56:09 -05:00
}
ErrorOr < int > serenity_main ( Main : : Arguments arguments )
{
StringView generated_header_path ;
StringView generated_implementation_path ;
StringView dates_path ;
Core : : ArgsParser args_parser ;
args_parser . add_option ( generated_header_path , " Path to the Unicode locale header file to generate " , " generated-header-path " , ' h ' , " generated-header-path " ) ;
args_parser . add_option ( generated_implementation_path , " Path to the Unicode locale implementation file to generate " , " generated-implementation-path " , ' c ' , " generated-implementation-path " ) ;
args_parser . add_option ( dates_path , " Path to cldr-dates directory " , " dates-path " , ' d ' , " dates-path " ) ;
args_parser . parse ( arguments ) ;
2023-02-09 03:02:46 +01:00
auto generated_header_file = TRY ( open_file ( generated_header_path , Core : : File : : OpenMode : : Write ) ) ;
auto generated_implementation_file = TRY ( open_file ( generated_implementation_path , Core : : File : : OpenMode : : Write ) ) ;
2022-01-26 08:56:09 -05:00
2022-09-02 11:48:05 -04:00
CLDR cldr ;
TRY ( parse_all_locales ( dates_path , cldr ) ) ;
2022-01-26 08:56:09 -05:00
2022-09-02 11:48:05 -04:00
TRY ( generate_unicode_locale_header ( * generated_header_file , cldr ) ) ;
TRY ( generate_unicode_locale_implementation ( * generated_implementation_file , cldr ) ) ;
2022-01-26 08:56:09 -05:00
return 0 ;
}