2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								/*
 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-31 13:07:22 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								 *  Copyright  ( c )  2021 ,  Tim  Flynn  < trflynn89 @ serenityos . org > 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								 * 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								 *  SPDX - License - Identifier :  BSD - 2 - Clause 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								 */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-12 12:22:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  "GeneratorUtil.h" 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/AllOf.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Array.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/CharacterTypes.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/Find.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/HashMap.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Optional.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/QuickSort.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/SourceGenerator.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/String.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/StringUtils.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Types.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Vector.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <LibCore/ArgsParser.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <LibCore/Stream.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								using  StringIndexType  =  u16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								constexpr  auto  s_string_index_type  =  " u16 " sv ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								// Some code points are excluded from UnicodeData.txt, and instead are part of a "range" of code
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								// points, as indicated by the "name" field. For example:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								//     3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								//     4DBF;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointRange  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  first ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  last ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								// SpecialCasing source: https://www.unicode.org/Public/13.0.0/ucd/SpecialCasing.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								// Field descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#SpecialCasing.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  SpecialCasing  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  index  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  lowercase_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  uppercase_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  titlecase_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  locale ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  condition ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								// Field descriptions: https://www.unicode.org/reports/tr44/#Character_Decomposition_Mappings
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointDecomposition  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // `tag` is a string since it's used for codegen as an enum value.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  tag  {  " Canonical " sv  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    size_t  decomposition_index  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    size_t  decomposition_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								// PropList source: https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								// Property descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#PropList.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								using  PropList  =  HashMap < String ,  Vector < CodePointRange > > ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								// Normalization source: https://www.unicode.org/Public/13.0.0/ucd/DerivedNormalizationProps.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								// Normalization descriptions: https://www.unicode.org/reports/tr44/#DerivedNormalizationProps.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								enum  class  QuickCheck  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Yes , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    No , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Maybe , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  Normalization  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  value ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    QuickCheck  quick_check  {  QuickCheck : : Yes  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								using  NormalizationProps  =  HashMap < String ,  Vector < Normalization > > ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  CodePointName  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringIndexType  name  {  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								// UnicodeData source: https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								// Field descriptions: https://www.unicode.org/reports/tr44/tr44-13.html#UnicodeData.txt
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								//                     https://www.unicode.org/reports/tr44/#General_Category_Values
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointData  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  name ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Optional < StringIndexType >  abbreviation ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    u8  canonical_combining_class  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  bidi_class ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Optional < CodePointDecomposition >  decomposition_mapping ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    Optional < i8 >  numeric_value_decimal ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < i8 >  numeric_value_digit ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < i8 >  numeric_value_numeric ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    bool  bidi_mirrored  {  false  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  unicode_1_name ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  iso_comment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < u32 >  simple_uppercase_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < u32 >  simple_lowercase_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < u32 >  simple_titlecase_mapping ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < u32 >  special_casing_indices ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  BlockName  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringIndexType  name  {  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								struct  UnicodeData  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    UniqueStringStorage < StringIndexType >  unique_strings ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    u32  code_points_with_non_zero_combining_class  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    u32  code_points_with_decomposition_mapping  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  decomposition_mappings ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < String >  compatibility_tags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    u32  simple_uppercase_mapping_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  simple_lowercase_mapping_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < SpecialCasing >  special_casing ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    u32  code_points_with_special_casing  {  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    u32  largest_casing_transform_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  largest_special_casing_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < String >  conditions ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < String >  locales ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    Vector < CodePointData >  code_point_data ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    HashMap < u32 ,  StringIndexType >  code_point_abbreviations ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    HashMap < u32 ,  StringIndexType >  code_point_display_name_aliases ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < CodePointName >  code_point_display_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    PropList  general_categories ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < Alias >  general_category_aliases ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 16:21:01 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // The Unicode standard defines additional properties (Any, Assigned, ASCII) which are not in
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // any UCD file. Assigned code point ranges are derived as this generator is executed.
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 16:21:01 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // https://unicode.org/reports/tr18/#General_Category_Property
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    PropList  prop_list  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  " Any " sv ,  {  {  0 ,  0x10ffff  }  }  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  " Assigned " sv ,  { }  } , 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 09:14:06 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  " ASCII " sv ,  {  {  0 ,  0x7f  }  }  } , 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 16:21:01 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < Alias >  prop_aliases ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 16:21:01 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    PropList  script_list  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  " Unknown " sv ,  { }  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < Alias >  script_aliases ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:05:30 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    PropList  script_extensions ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    PropList  block_list  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  " No_Block " sv ,  { }  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < Alias >  block_aliases ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < BlockName >  block_display_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // FIXME: We are not yet doing anything with this data. It will be needed for String.prototype.normalize.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    NormalizationProps  normalization_props ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    PropList  grapheme_break_props ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    PropList  word_break_props ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    PropList  sentence_break_props ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  String  sanitize_entry ( String  const &  entry ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  sanitized  =  entry . replace ( " - " sv ,  " _ " sv ,  ReplaceMode : : All ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    sanitized  =  sanitized . replace ( "   " sv ,  " _ " sv ,  ReplaceMode : : All ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringBuilder  builder ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    bool  next_is_upper  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  ch  :  sanitized )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( next_is_upper ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            builder . append_code_point ( to_ascii_uppercase ( ch ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        else 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            builder . append_code_point ( ch ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        next_is_upper  =  ch  = =  ' _ ' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  builder . to_string ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-11 00:55:02 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  Vector < u32 >  parse_code_point_list ( StringView  list ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Vector < u32 >  code_points ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  segments  =  list . split_view ( '   ' ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  const &  code_point  :  segments ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        code_points . append ( AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( code_point ) . value ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    return  code_points ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-11 00:55:02 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  CodePointRange  parse_code_point_range ( StringView  list ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    if  ( list . contains ( " .. " sv ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  segments  =  list . split_view ( " .. " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  begin  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 0 ] ) . value ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  end  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 1 ] ) . value ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        code_point_range  =  {  begin ,  end  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  code_point  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( list ) . value ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        code_point_range  =  {  code_point ,  code_point  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  code_point_range ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_special_casing ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( auto  index  =  line . find ( ' # ' ) ;  index . has_value ( ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            line  =  line . substring_view ( 0 ,  * index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  5  | |  segments . size ( )  = =  6 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        SpecialCasing  casing  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        casing . code_point  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 0 ] ) . value ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        casing . lowercase_mapping  =  parse_code_point_list ( segments [ 1 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        casing . titlecase_mapping  =  parse_code_point_list ( segments [ 2 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        casing . uppercase_mapping  =  parse_code_point_list ( segments [ 3 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( auto  condition  =  segments [ 4 ] . trim_whitespace ( ) ;  ! condition . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            auto  conditions  =  condition . split_view ( '   ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            VERIFY ( conditions . size ( )  = =  1  | |  conditions . size ( )  = =  2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( conditions . size ( )  = =  2 )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                casing . locale  =  conditions [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                casing . condition  =  conditions [ 1 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            }  else  if  ( all_of ( conditions [ 0 ] ,  is_ascii_lower_alpha ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                casing . locale  =  conditions [ 0 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            }  else  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                casing . condition  =  conditions [ 0 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  ( ! casing . locale . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-26 06:42:11 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                casing . locale  =  String : : formatted ( " {:c}{} " ,  to_ascii_uppercase ( casing . locale [ 0 ] ) ,  casing . locale . substring_view ( 1 ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                if  ( ! unicode_data . locales . contains_slow ( casing . locale ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                    unicode_data . locales . append ( casing . locale ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            casing . condition  =  casing . condition . replace ( " _ " sv ,  " " sv ,  ReplaceMode : : All ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( ! casing . condition . is_empty ( )  & &  ! unicode_data . conditions . contains_slow ( casing . condition ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                unicode_data . conditions . append ( casing . condition ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . largest_casing_transform_size  =  max ( unicode_data . largest_casing_transform_size ,  casing . lowercase_mapping . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . largest_casing_transform_size  =  max ( unicode_data . largest_casing_transform_size ,  casing . titlecase_mapping . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . largest_casing_transform_size  =  max ( unicode_data . largest_casing_transform_size ,  casing . uppercase_mapping . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . special_casing . append ( move ( casing ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-05 13:39:06 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    quick_sort ( unicode_data . special_casing ,  [ ] ( auto  const &  lhs ,  auto  const &  rhs )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( lhs . code_point  ! =  rhs . code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  lhs . code_point  <  rhs . code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( lhs . locale . is_empty ( )  & &  ! rhs . locale . is_empty ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( ! lhs . locale . is_empty ( )  & &  rhs . locale . is_empty ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  lhs . locale  <  rhs . locale ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( u32  i  =  0 ;  i  <  unicode_data . special_casing . size ( ) ;  + + i ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . special_casing [ i ] . index  =  i ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_prop_list ( Core : : Stream : : BufferedFile &  file ,  PropList &  prop_list ,  bool  multi_value_property  =  false ,  bool  sanitize_property  =  false ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( auto  index  =  line . find ( ' # ' ) ;  index . has_value ( ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            line  =  line . substring_view ( 0 ,  * index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  code_point_range  =  parse_code_point_range ( segments [ 0 ] . trim_whitespace ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:05:30 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        Vector < StringView >  properties ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:05:30 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( multi_value_property ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            properties  =  segments [ 1 ] . trim_whitespace ( ) . split_view ( '   ' ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        else 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            properties  =  {  segments [ 1 ] . trim_whitespace ( )  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto &  property  :  properties )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            auto &  code_points  =  prop_list . ensure ( sanitize_property  ?  sanitize_entry ( property ) . trim_whitespace ( ) . view ( )  :  property . trim_whitespace ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            code_points . append ( code_point_range ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_alias_list ( Core : : Stream : : BufferedFile &  file ,  PropList  const &  prop_list ,  Vector < Alias > &  prop_aliases ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    String  current_property ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  append_alias  =  [ & ] ( auto  alias ,  auto  property )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // Note: The alias files contain lines such as "Hyphen = Hyphen", which we should just skip.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( alias  = =  property ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // FIXME: We will, eventually, need to find where missing properties are located and parse them.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( ! prop_list . contains ( property ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        prop_aliases . append ( {  property ,  alias  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( line . ends_with ( " Properties " sv ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                current_property  =  line . substring_view ( 2 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // Note: For now, we only care about Binary Property aliases for Unicode property escapes.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( current_property  ! =  " Binary Properties " sv ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( ( segments . size ( )  = =  2 )  | |  ( segments . size ( )  = =  3 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  alias  =  segments [ 0 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  property  =  segments [ 1 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        append_alias ( alias ,  property ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( segments . size ( )  = =  3 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            alias  =  segments [ 2 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            append_alias ( alias ,  property ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_name_aliases ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  3 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  code_point  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 0 ] . trim_whitespace ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  alias  =  segments [ 1 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  reason  =  segments [ 2 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( reason  = =  " abbreviation " sv )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            auto  index  =  unicode_data . unique_strings . ensure ( alias ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            unicode_data . code_point_abbreviations . set ( * code_point ,  index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        }  else  if  ( reason . is_one_of ( " correction " sv ,  " control " sv ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  ( ! unicode_data . code_point_display_name_aliases . contains ( * code_point ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                auto  index  =  unicode_data . unique_strings . ensure ( alias ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                unicode_data . code_point_display_name_aliases . set ( * code_point ,  index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_value_alias_list ( Core : : Stream : : BufferedFile &  file ,  StringView  desired_category ,  Vector < String >  const &  value_list ,  Vector < Alias > &  prop_aliases ,  bool  primary_value_is_first  =  true ,  bool  sanitize_alias  =  false ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( file . seek ( 0 ,  Core : : Stream : : SeekMode : : SetPosition ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_alias  =  [ & ] ( auto  alias ,  auto  value )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        // Note: The value alias file contains lines such as "Ahom = Ahom", which we should just skip.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( alias  = =  value ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        // FIXME: We will, eventually, need to find where missing properties are located and parse them.
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( ! value_list . contains_slow ( value ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        prop_aliases . append ( {  value ,  alias  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( auto  index  =  line . find ( ' # ' ) ;  index . has_value ( ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            line  =  line . substring_view ( 0 ,  * index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  category  =  segments [ 0 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( category  ! =  desired_category ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        VERIFY ( ( segments . size ( )  = =  3 )  | |  ( segments . size ( )  = =  4 ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  value  =  primary_value_is_first  ?  segments [ 1 ] . trim_whitespace ( )  :  segments [ 2 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  alias  =  primary_value_is_first  ?  segments [ 2 ] . trim_whitespace ( )  :  segments [ 1 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        append_alias ( sanitize_alias  ?  sanitize_entry ( alias ) . view ( )  :  alias ,  value ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( segments . size ( )  = =  4 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            alias  =  segments [ 3 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            append_alias ( sanitize_alias  ?  sanitize_entry ( alias ) . view ( )  :  alias ,  value ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_normalization_props ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( auto  index  =  line . find ( ' # ' ) ;  index . has_value ( ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            line  =  line . substring_view ( 0 ,  * index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( ( segments . size ( )  = =  2 )  | |  ( segments . size ( )  = =  3 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  code_point_range  =  parse_code_point_range ( segments [ 0 ] . trim_whitespace ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  property  =  segments [ 1 ] . trim_whitespace ( ) . to_string ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        Vector < u32 >  value ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        QuickCheck  quick_check  =  QuickCheck : : Yes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( segments . size ( )  = =  3 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            auto  value_or_quick_check  =  segments [ 2 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( ( value_or_quick_check  = =  " N " sv ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                quick_check  =  QuickCheck : : No ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            else  if  ( ( value_or_quick_check  = =  " M " sv ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                quick_check  =  QuickCheck : : Maybe ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            else 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                value  =  parse_code_point_list ( value_or_quick_check ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto &  normalizations  =  unicode_data . normalization_props . ensure ( property ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        normalizations . append ( {  code_point_range ,  move ( value ) ,  quick_check  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto &  prop_list  =  unicode_data . prop_list . ensure ( property ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        prop_list . append ( move ( code_point_range ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  void  add_canonical_code_point_name ( CodePointRange  range ,  StringView  name ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-06 09:02:04 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // https://www.unicode.org/versions/Unicode15.0.0/ch04.pdf#G142981
 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // FIXME: Implement the NR1 rules for Hangul syllables.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    struct  CodePointNameFormat  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        CodePointRange  code_point_range ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        StringView  name ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // These code point ranges are the NR2 set of name replacements defined by Table 4-8.
 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-06 09:02:04 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    constexpr  Array < CodePointNameFormat ,  16 >  s_ideographic_replacements  {  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0x3400 ,  0x4DBF  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-06 09:02:04 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0x4E00 ,  0x9FFF  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0xF900 ,  0xFA6D  } ,  " CJK COMPATIBILITY IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0xFA70 ,  0xFAD9  } ,  " CJK COMPATIBILITY IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x17000 ,  0x187F7  } ,  " TANGUT IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x18B00 ,  0x18CD5  } ,  " KHITAN SMALL SCRIPT CHARACTER-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x18D00 ,  0x18D08  } ,  " TANGUT IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x1B170 ,  0x1B2FB  } ,  " NUSHU CHARACTER-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-06 09:02:04 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0x20000 ,  0x2A6DF  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x2A700 ,  0x2B739  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0x2B740 ,  0x2B81D  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x2B820 ,  0x2CEA1  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x2CEB0 ,  0x2EBE0  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x2F800 ,  0x2FA1D  } ,  " CJK COMPATIBILITY IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        {  {  0x30000 ,  0x3134A  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-06 09:02:04 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        {  {  0x31350 ,  0x323AF  } ,  " CJK UNIFIED IDEOGRAPH-{:X} " sv  } , 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    }  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  it  =  find_if ( s_ideographic_replacements . begin ( ) ,  s_ideographic_replacements . end ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        [ & ] ( auto  const &  replacement )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  replacement . code_point_range . first  = =  range . first ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( it  ! =  s_ideographic_replacements . end ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  index  =  unicode_data . unique_strings . ensure ( it - > name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . code_point_display_names . append ( {  it - > code_point_range ,  index  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    it  =  find_if ( s_ideographic_replacements . begin ( ) ,  s_ideographic_replacements . end ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        [ & ] ( auto  const &  replacement )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  ( replacement . code_point_range . first  < =  range . first )  & &  ( range . first  < =  replacement . code_point_range . last ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( it  ! =  s_ideographic_replacements . end ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // Drop code points that will have been captured by a range defined by the ideographic replacements.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( auto  alias  =  unicode_data . code_point_display_name_aliases . get ( range . first ) ;  alias . has_value ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // NR4 states that control code points have a null string as their name. Our implementation
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        // uses the control code's alias as its display name.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . code_point_display_names . append ( {  range ,  * alias  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  index  =  unicode_data . unique_strings . ensure ( name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    unicode_data . code_point_display_names . append ( {  range ,  index  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  Optional < CodePointDecomposition >  parse_decomposition_mapping ( StringView  string ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( string . is_empty ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointDecomposition  mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  parts  =  string . split_view ( '   ' ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    VERIFY ( parts . size ( )  >  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( parts . first ( ) . starts_with ( ' < ' ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  const  tag  =  parts . take_first ( ) . trim ( " <> " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        mapping . tag  =  String : : formatted ( " {:c}{} " ,  to_ascii_uppercase ( tag [ 0 ] ) ,  tag . substring_view ( 1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( ! unicode_data . compatibility_tags . contains_slow ( mapping . tag ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            unicode_data . compatibility_tags . append ( mapping . tag ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    mapping . decomposition_index  =  unicode_data . decomposition_mappings . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    mapping . decomposition_size  =  parts . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  part  :  parts )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . decomposition_mappings . append ( AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( part ) . value ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_block_display_names ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( )  | |  line . starts_with ( ' # ' ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  code_point_range  =  parse_code_point_range ( segments [ 0 ] . trim_whitespace ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  display_name  =  segments [ 1 ] . trim_whitespace ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  index  =  unicode_data . unique_strings . ensure ( display_name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . block_display_names . append ( {  code_point_range ,  index  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( file . seek ( 0 ,  Core : : Stream : : SeekMode : : SetPosition ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  parse_unicode_data ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    Optional < u32 >  code_point_range_start ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto &  assigned_code_points  =  unicode_data . prop_list . find ( " Assigned " sv ) - > value ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Optional < u32 >  assigned_code_point_range_start  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  previous_code_point  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Array < u8 ,  1024 >  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    while  ( TRY ( file . can_read_line ( ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-04-15 14:52:33 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  line  =  TRY ( file . read_line ( buffer ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        if  ( line . is_empty ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-22 15:38:21 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  segments  =  line . split_view ( ' ; ' ,  SplitBehavior : : KeepEmpty ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        VERIFY ( segments . size ( )  = =  15 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        CodePointData  data  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . code_point  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 0 ] ) . value ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        data . name  =  segments [ 1 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        data . canonical_combining_class  =  AK : : StringUtils : : convert_to_uint < u8 > ( segments [ 3 ] ) . value ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        data . bidi_class  =  segments [ 4 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        data . decomposition_mapping  =  parse_decomposition_mapping ( segments [ 5 ] ,  unicode_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        data . numeric_value_decimal  =  AK : : StringUtils : : convert_to_int < i8 > ( segments [ 6 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . numeric_value_digit  =  AK : : StringUtils : : convert_to_int < i8 > ( segments [ 7 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . numeric_value_numeric  =  AK : : StringUtils : : convert_to_int < i8 > ( segments [ 8 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . bidi_mirrored  =  segments [ 9 ]  = =  " Y " sv ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        data . unicode_1_name  =  segments [ 10 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . iso_comment  =  segments [ 11 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        data . simple_uppercase_mapping  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 12 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . simple_lowercase_mapping  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 13 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        data . simple_titlecase_mapping  =  AK : : StringUtils : : convert_to_uint_from_hex < u32 > ( segments [ 14 ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( auto  abbreviation  =  unicode_data . code_point_abbreviations . get ( data . code_point ) ;  abbreviation . has_value ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            data . abbreviation  =  * abbreviation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( ! assigned_code_point_range_start . has_value ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            assigned_code_point_range_start  =  data . code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( data . name . starts_with ( " < " sv )  & &  data . name . ends_with ( " , First> " sv ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            VERIFY ( ! code_point_range_start . has_value ( )  & &  assigned_code_point_range_start . has_value ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								            code_point_range_start  =  data . code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            data . name  =  data . name . substring ( 1 ,  data . name . length ( )  -  9 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            assigned_code_points . append ( {  * assigned_code_point_range_start ,  previous_code_point  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            assigned_code_point_range_start . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        }  else  if  ( data . name . starts_with ( " < " sv )  & &  data . name . ends_with ( " , Last> " sv ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 09:14:06 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            VERIFY ( code_point_range_start . has_value ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            CodePointRange  code_point_range  {  * code_point_range_start ,  data . code_point  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            assigned_code_points . append ( code_point_range ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            data . name  =  data . name . substring ( 1 ,  data . name . length ( )  -  8 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								            code_point_range_start . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            add_canonical_code_point_name ( code_point_range ,  data . name ,  unicode_data ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            add_canonical_code_point_name ( {  data . code_point ,  data . code_point  } ,  data . name ,  unicode_data ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( ( data . code_point  >  0 )  & &  ( data . code_point  -  previous_code_point )  ! =  1 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                VERIFY ( assigned_code_point_range_start . has_value ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                assigned_code_points . append ( {  * assigned_code_point_range_start ,  previous_code_point  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                assigned_code_point_range_start  =  data . code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        bool  has_special_casing  {  false  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  casing  :  unicode_data . special_casing )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  ( casing . code_point  = =  data . code_point )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                data . special_casing_indices . append ( casing . index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                has_special_casing  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        unicode_data . code_points_with_non_zero_combining_class  + =  data . canonical_combining_class  ! =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        unicode_data . simple_uppercase_mapping_size  + =  data . simple_uppercase_mapping . has_value ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . simple_lowercase_mapping_size  + =  data . simple_lowercase_mapping . has_value ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        unicode_data . code_points_with_decomposition_mapping  + =  data . decomposition_mapping . has_value ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        unicode_data . code_points_with_special_casing  + =  has_special_casing ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        unicode_data . largest_special_casing_size  =  max ( unicode_data . largest_special_casing_size ,  data . special_casing_indices . size ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        previous_code_point  =  data . code_point ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								        unicode_data . code_point_data . append ( move ( data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  generate_unicode_data_header ( Core : : Stream : : BufferedFile &  file ,  UnicodeData &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringBuilder  builder ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    SourceGenerator  generator  {  builder  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . set ( " casing_transform_size " ,  String : : number ( unicode_data . largest_casing_transform_size ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  generate_enum  =  [ & ] ( StringView  name ,  StringView  default_ ,  Vector < String >  values ,  Vector < Alias >  aliases  =  { } )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        quick_sort ( values ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        quick_sort ( aliases ,  [ ] ( auto &  alias1 ,  auto &  alias2 )  {  return  alias1 . alias  <  alias2 . alias ;  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " name " ,  name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 17:58:32 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " underlying " ,  String : : formatted ( " {}UnderlyingType " ,  name ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " type " ,  ( ( values . size ( )  +  ! default_ . is_empty ( ) )  <  256 )  ?  " u8 " sv  :  " u16 " sv ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 17:58:32 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								using  @ underlying @  =  @ type @ ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 17:58:32 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								enum  class  @ name @  :  @ underlying @  { ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 17:58:32 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( ! default_ . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " default " ,  default_ ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    @ default @ , ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  value  :  values )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " value " ,  value ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    @ value @ , ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 11:48:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  alias  :  aliases )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " alias " ,  alias . alias ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-17 08:14:56 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . set ( " value " ,  alias . name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    @ alias @  =  @ value @ , ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 11:48:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# pragma once 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/Types.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <LibUnicode/Forward.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 18:24:39 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								namespace  Unicode  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 18:24:39 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " Locale " sv ,  " None " sv ,  unicode_data . locales ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 22:21:53 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " Condition " sv ,  " None " sv ,  move ( unicode_data . conditions ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " GeneralCategory " sv ,  { } ,  unicode_data . general_categories . keys ( ) ,  unicode_data . general_category_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " Property " sv ,  { } ,  unicode_data . prop_list . keys ( ) ,  unicode_data . prop_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " Script " sv ,  { } ,  unicode_data . script_list . keys ( ) ,  unicode_data . script_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " Block " sv ,  { } ,  unicode_data . block_list . keys ( ) ,  unicode_data . block_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " GraphemeBreakProperty " sv ,  { } ,  unicode_data . grapheme_break_props . keys ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generate_enum ( " WordBreakProperty " sv ,  { } ,  unicode_data . word_break_props . keys ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generate_enum ( " SentenceBreakProperty " sv ,  { } ,  unicode_data . sentence_break_props . keys ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generate_enum ( " CompatibilityFormattingTag " sv ,  " Canonical " sv ,  unicode_data . compatibility_tags ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 18:24:39 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  SpecialCasing  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  lowercase_mapping [ @ casing_transform_size @ ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  lowercase_mapping_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  uppercase_mapping [ @ casing_transform_size @ ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  uppercase_mapping_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  titlecase_mapping [ @ casing_transform_size @ ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  titlecase_mapping_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Locale  locale  {  Locale : : None  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    Condition  condition  {  Condition : : None  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  CodePointDecomposition  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CompatibilityFormattingTag  tag  {  CompatibilityFormattingTag : : Canonical  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Span < u32  const >  decomposition ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								Optional < Locale >  locale_from_string ( StringView  locale ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-02 10:47:41 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( file . write ( generator . as_string_view ( ) . bytes ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  ErrorOr < void >  generate_unicode_data_implementation ( Core : : Stream : : BufferedFile &  file ,  UnicodeData  const &  unicode_data ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringBuilder  builder ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    SourceGenerator  generator  {  builder  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . set ( " string_index_type " sv ,  s_string_index_type ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . set ( " largest_special_casing_size " ,  String : : number ( unicode_data . largest_special_casing_size ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . set ( " special_casing_size " ,  String : : number ( unicode_data . special_casing . size ( ) ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Array.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/BinarySearch.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/CharacterTypes.h> 
 
							 
						 
					
						
							
								
									
										
											 
										 
										
											
												LibUnicode: Dynamically load the generated UnicodeData symbols
The generated data for libunicodedata.so is quite large, and loading it
is a price paid by nearly every application by way of depending on
LibRegex. In order to defer this cost until an application actually uses
one of the surrounding APIs, dynamically load the generated symbols.
To be able to load the symbols dynamically, the generated methods must
have demangled names. Typically, this is accomplished with `extern "C"`
blocks. The clang toolchain complains about this here because the types
returned from the generators are strictly C++ types. So to demangle the
names, we use the asm() compiler directive to manually define a symbol
name; the caveat is that we *must* be sure the symbols are unique. As an
extra precaution, we prefix each symbol name with "unicode_". For more
details, see: https://gcc.gnu.org/onlinedocs/gcc/Asm-Labels.html
This symbol loader used in this implementation provides the additional
benefit of removing many [[maybe_unused]] attributes from the LibUnicode
methods. Internally, if ENABLE_UNICODE_DATABASE_DOWNLOAD is OFF, the
loader is able to stub out the function pointers it returns.
Note that as of this commit, LibUnicode is still directly linked against
LibUnicodeData. This commit is just a first step towards removing that.
											 
										 
										
											2021-12-15 18:42:51 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/Optional.h> 
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								# include  <AK/Span.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/String.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <AK/StringView.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-04 13:26:43 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <LibUnicode/CharacterTypes.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								# include  <LibUnicode/UnicodeData.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								# include  <LibUnicode/Normalize.h> 
 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-04 13:26:43 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								namespace  Unicode  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    unicode_data . unique_strings . generate ( generator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_list_and_size  =  [ & ] ( auto  const &  list ,  StringView  format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( list . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( " , {}, 0 " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        bool  first  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( " , { " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  item  :  list )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-07-11 17:32:29 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( first  ?  "   " sv  :  " ,  " sv ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( String : : formatted ( format ,  item ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            first  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( String : : formatted ( "  }}, {} " ,  list . size ( ) ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  constexpr  Array < SpecialCasing ,  @ special_casing_size @ >  s_special_casing  {  { ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  const &  casing  :  unicode_data . special_casing )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " code_point " ,  String : : formatted ( " {:#x} " ,  casing . code_point ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    {  @ code_point @ ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        constexpr  auto  format  =  " 0x{:x} " sv ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        append_list_and_size ( casing . lowercase_mapping ,  format ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        append_list_and_size ( casing . uppercase_mapping ,  format ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        append_list_and_size ( casing . titlecase_mapping ,  format ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " locale " ,  casing . locale . is_empty ( )  ?  " None "  :  casing . locale ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( " , Locale::@locale@ " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " condition " ,  casing . condition . is_empty ( )  ?  " None "  :  casing . condition ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( " , Condition::@condition@ " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( "  }, " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  CodePointMapping  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  mapping  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  SpecialCaseMapping  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Array < SpecialCasing  const * ,  @ largest_special_casing_size @ >  special_casing  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  special_casing_size  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  CodePointAbbreviation  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  code_point  {  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    @ string_index_type @  abbreviation  {  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								template < typename  MappingType > 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								struct  CodePointComparator  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    constexpr  int  operator ( ) ( u32  code_point ,  MappingType  const &  mapping ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  code_point  -  mapping . code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointRangeComparator  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    constexpr  int  operator ( ) ( u32  code_point ,  CodePointRange  const &  range ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  ( code_point  >  range . last )  -  ( code_point  <  range . first ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  BlockNameData  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    @ string_index_type @  display_name  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  BlockNameComparator  :  public  CodePointRangeComparator  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    constexpr  int  operator ( ) ( u32  code_point ,  BlockNameData  const &  name ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  CodePointRangeComparator : : operator ( ) ( code_point ,  name . code_point_range ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointName  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    CodePointRange  code_point_range  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    @ string_index_type @  display_name  {  0  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								struct  CodePointNameComparator  :  public  CodePointRangeComparator  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    constexpr  int  operator ( ) ( u32  code_point ,  CodePointName  const &  name ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  CodePointRangeComparator : : operator ( ) ( code_point ,  name . code_point_range ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . set ( " decomposition_mappings_size " ,  String : : number ( unicode_data . decomposition_mappings . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( " \n static constexpr Array<u32, @decomposition_mappings_size@> s_decomposition_mappings_data {  " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( String : : join ( " ,  " sv ,  unicode_data . decomposition_mappings ,  " {:#x} " sv ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( "  }; \n " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_code_point_mappings  =  [ & ] ( StringView  name ,  StringView  mapping_type ,  u32  size ,  auto  mapping_getter )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " name " ,  name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " mapping_type " ,  mapping_type ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " size " ,  String : : number ( size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  constexpr  Array < @ mapping_type @ ,  @ size @ >  s_ @ name @ _mappings  {  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        constexpr  size_t  max_mappings_per_row  =  20 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        size_t  mappings_in_current_row  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  data  :  unicode_data . code_point_data )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            auto  mapping  =  mapping_getter ( data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  constexpr  ( requires  {  mapping . has_value ( ) ;  } )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                if  ( ! mapping . has_value ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                    continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                if  ( mapping . is_empty ( ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                    continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( mappings_in_current_row + +  >  0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( "   " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " code_point " ,  String : : formatted ( " {:#x} " ,  data . code_point ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            generator . append ( " { @code_point@ " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            if  constexpr  ( IsSame < decltype ( mapping ) ,  Optional < u32 > >  | |  IsSame < decltype ( mapping ) ,  Optional < StringIndexType > > )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								                generator . set ( " mapping " ,  String : : formatted ( " {:#x} " ,  * mapping ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " , @mapping@ }, " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            }  else  if  constexpr  ( IsSame < decltype ( mapping ) ,  Optional < CodePointDecomposition > > )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . set ( " tag " ,  mapping - > tag ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . set ( " start " ,  String : : number ( mapping - > decomposition_index ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . set ( " size " ,  String : : number ( mapping - > decomposition_size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " , CompatibilityFormattingTag::@tag@, Span<u32 const> { s_decomposition_mappings_data.data() + @start@, @size@ } }, " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                append_list_and_size ( data . special_casing_indices ,  " &s_special_casing[{}] " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( "  }, " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( mappings_in_current_row  = =  max_mappings_per_row )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                mappings_in_current_row  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " \n      " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " combining_class " sv ,  " CodePointMapping " sv ,  unicode_data . code_points_with_non_zero_combining_class , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        [ ] ( auto  const &  data )  - >  Optional < u32 >  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( data . canonical_combining_class  = =  0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  data . canonical_combining_class ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " uppercase " sv ,  " CodePointMapping " sv ,  unicode_data . simple_uppercase_mapping_size ,  [ ] ( auto  const &  data )  {  return  data . simple_uppercase_mapping ;  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " lowercase " sv ,  " CodePointMapping " sv ,  unicode_data . simple_lowercase_mapping_size ,  [ ] ( auto  const &  data )  {  return  data . simple_lowercase_mapping ;  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " special_case " sv ,  " SpecialCaseMapping " sv ,  unicode_data . code_points_with_special_casing ,  [ ] ( auto  const &  data )  {  return  data . special_casing_indices ;  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " abbreviation " sv ,  " CodePointAbbreviation " sv ,  unicode_data . code_point_abbreviations . size ( ) ,  [ ] ( auto  const &  data )  {  return  data . abbreviation ;  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mappings ( " decomposition " sv ,  " CodePointDecomposition " sv ,  unicode_data . code_points_with_decomposition_mapping , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        [ ] ( auto  const &  data )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  data . decomposition_mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_code_point_range_list  =  [ & ] ( String  name ,  Vector < CodePointRange >  const &  ranges )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " name " ,  name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " size " ,  String : : number ( ranges . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  constexpr  Array < CodePointRange ,  @ size @ >  @ name @  {  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        constexpr  size_t  max_ranges_per_row  =  20 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        size_t  ranges_in_current_row  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  range  :  ranges )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( ranges_in_current_row + +  >  0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( "   " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " first " ,  String : : formatted ( " {:#x} " ,  range . first ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " last " ,  String : : formatted ( " {:#x} " ,  range . last ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . append ( " { @first@, @last@ }, " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( ranges_in_current_row  = =  max_ranges_per_row )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                ranges_in_current_row  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " \n      " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  append_prop_list  =  [ & ] ( StringView  collection_name ,  StringView  property_format ,  PropList  const &  property_list )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  property  :  property_list )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            auto  name  =  String : : formatted ( property_format ,  property . key ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            append_code_point_range_list ( move ( name ) ,  property . value ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  property_names  =  property_list . keys ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        quick_sort ( property_names ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " name " ,  collection_name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " size " ,  String : : number ( property_names . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  constexpr  Array < Span < CodePointRange  const > ,  @ size @ >  @ name @  {  { ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  property_name  :  property_names )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " name " ,  String : : formatted ( property_format ,  property_name ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    @ name @ . span ( ) , ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_general_categories " sv ,  " s_general_category_{} " sv ,  unicode_data . general_categories ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 08:27:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_properties " sv ,  " s_property_{} " sv ,  unicode_data . prop_list ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_scripts " sv ,  " s_script_{} " sv ,  unicode_data . script_list ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_script_extensions " sv ,  " s_script_extension_{} " sv ,  unicode_data . script_extensions ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_blocks " sv ,  " s_block_{} " sv ,  unicode_data . block_list ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_grapheme_break_properties " sv ,  " s_grapheme_break_property_{} " sv ,  unicode_data . grapheme_break_props ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_word_break_properties " sv ,  " s_word_break_property_{} " sv ,  unicode_data . word_break_props ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_list ( " s_sentence_break_properties " sv ,  " s_sentence_break_property_{} " sv ,  unicode_data . sentence_break_props ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_code_point_display_names  =  [ & ] ( StringView  type ,  StringView  name ,  auto  const &  display_names )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        constexpr  size_t  max_values_per_row  =  30 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        size_t  values_in_current_row  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " type " ,  type ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " name " ,  name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " size " ,  String : : number ( display_names . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  constexpr  Array < @ type @ ,  @ size @ >  @ name @  {  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    ) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  display_name  :  display_names )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( values_in_current_row + +  >  0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " ,  " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " first " ,  String : : formatted ( " {:#x} " ,  display_name . code_point_range . first ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " last " ,  String : : formatted ( " {:#x} " ,  display_name . code_point_range . last ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . set ( " name " ,  String : : number ( display_name . name ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            generator . append ( " { { @first@, @last@ }, @name@ } " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( values_in_current_row  = =  max_values_per_row )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                values_in_current_row  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                generator . append ( " , \n      " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_code_point_display_names ( " BlockNameData " sv ,  " s_block_display_names " sv ,  unicode_data . block_display_names ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_code_point_display_names ( " CodePointName " sv ,  " s_code_point_display_names " sv ,  unicode_data . code_point_display_names ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Optional < StringView >  code_point_block_display_name ( u32  code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( auto  const *  entry  =  binary_search ( s_block_display_names ,  code_point ,  nullptr ,  BlockNameComparator  { } ) ) 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        return  decode_string ( entry - > display_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Span < BlockName  const >  block_display_names ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    static  auto  display_names  =  [ ] ( )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        Array < BlockName ,  s_block_display_names . size ( ) >  display_names ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( size_t  i  =  0 ;  i  <  s_block_display_names . size ( ) ;  + + i )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            auto  const &  display_name  =  s_block_display_names [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            display_names [ i ]  =  {  display_name . code_point_range ,  decode_string ( display_name . display_name )  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        return  display_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    return  display_names . span ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								Optional < String >  code_point_display_name ( u32  code_point ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    if  ( auto  const *  entry  =  binary_search ( s_code_point_display_names ,  code_point ,  nullptr ,  CodePointNameComparator  { } ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  display_name  =  decode_string ( entry - > display_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        if  ( display_name . ends_with ( " {:X} " sv ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            return  String : : formatted ( display_name ,  code_point ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  display_name ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 08:24:13 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_code_point_mapping_search  =  [ & ] ( StringView  method ,  StringView  mappings ,  StringView  fallback )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " method " ,  method ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " mappings " ,  mappings ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . set ( " fallback " ,  fallback ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								u32  @ method @ ( u32  code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  const *  mapping  =  binary_search ( @ mappings @ ,  code_point ,  nullptr ,  CodePointComparator < CodePointMapping >  { } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    return  mapping  ?  mapping - > mapping  :  @ fallback @ ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 19:14:23 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mapping_search ( " canonical_combining_class " sv ,  " s_combining_class_mappings " sv ,  " 0 " sv ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-04 13:26:43 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_code_point_mapping_search ( " to_unicode_uppercase " sv ,  " s_uppercase_mappings " sv ,  " code_point " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_code_point_mapping_search ( " to_unicode_lowercase " sv ,  " s_lowercase_mappings " sv ,  " code_point " sv ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:12:57 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Span < SpecialCasing  const *  const >  special_case_mapping ( u32  code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  const *  mapping  =  binary_search ( s_special_case_mappings ,  code_point ,  nullptr ,  CodePointComparator < SpecialCaseMapping >  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( mapping  = =  nullptr ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  mapping - > special_casing . span ( ) . slice ( 0 ,  mapping - > special_casing_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Optional < StringView >  code_point_abbreviation ( u32  code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  const *  mapping  =  binary_search ( s_abbreviation_mappings ,  code_point ,  nullptr ,  CodePointComparator < CodePointAbbreviation >  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( mapping  = =  nullptr ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    if  ( mapping - > abbreviation  = =  0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-17 09:52:26 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    return  decode_string ( mapping - > abbreviation ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-18 08:29:47 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2022-10-02 22:57:22 -03:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Optional < CodePointDecomposition  const & >  code_point_decomposition ( u32  code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  const *  mapping  =  binary_search ( s_decomposition_mappings ,  code_point ,  nullptr ,  CodePointComparator < CodePointDecomposition >  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    if  ( mapping  = =  nullptr ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  * mapping ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								Span < CodePointDecomposition  const >  code_point_decompositions ( ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  s_decomposition_mappings ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-09 18:52:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_prop_search  =  [ & ] ( StringView  enum_title ,  StringView  enum_snake ,  StringView  collection_name )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " enum_title " ,  enum_title ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " enum_snake " ,  enum_snake ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        generator . set ( " collection_name " ,  collection_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								bool  code_point_has_ @ enum_snake @ ( u32  code_point ,  @ enum_title @  @ enum_snake @ ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  index  =  static_cast < @ enum_title @ UnderlyingType > ( @ enum_snake @ ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  const &  ranges  =  @ collection_name @ . at ( index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  const *  range  =  binary_search ( ranges ,  code_point ,  nullptr ,  CodePointRangeComparator  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  range  ! =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  append_from_string  =  [ & ] ( StringView  enum_title ,  StringView  enum_snake ,  auto  const &  prop_list ,  Vector < Alias >  const &  aliases )  { 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-12 12:22:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        HashValueMap < StringView >  hashes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        hashes . ensure_capacity ( prop_list . size ( )  +  aliases . size ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        ValueFromStringOptions  options  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  prop  :  prop_list )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  constexpr  ( IsSame < RemoveCVReference < decltype ( prop ) > ,  String > )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                hashes . set ( CaseInsensitiveStringViewTraits : : hash ( prop ) ,  prop ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                options . sensitivity  =  CaseSensitivity : : CaseInsensitive ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                hashes . set ( prop . key . hash ( ) ,  prop . key ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  alias  :  aliases ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-10-12 12:22:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								            hashes . set ( alias . alias . hash ( ) ,  alias . alias ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        generate_value_from_string ( generator ,  " {}_from_string " sv ,  enum_title ,  enum_snake ,  move ( hashes ) ,  options ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-09-02 12:16:00 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_from_string ( " Locale " sv ,  " locale " sv ,  unicode_data . locales ,  { } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_search ( " GeneralCategory " sv ,  " general_category " sv ,  " s_general_categories " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_from_string ( " GeneralCategory " sv ,  " general_category " sv ,  unicode_data . general_categories ,  unicode_data . general_category_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_search ( " Property " sv ,  " property " sv ,  " s_properties " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_from_string ( " Property " sv ,  " property " sv ,  unicode_data . prop_list ,  unicode_data . prop_aliases ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_search ( " Script " sv ,  " script " sv ,  " s_scripts " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_search ( " Script " sv ,  " script_extension " sv ,  " s_script_extensions " sv ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 09:04:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_from_string ( " Script " sv ,  " script " sv ,  unicode_data . script_list ,  unicode_data . script_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_search ( " Block " sv ,  " block " sv ,  " s_blocks " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_from_string ( " Block " sv ,  " block " sv ,  unicode_data . block_list ,  unicode_data . block_aliases ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    append_prop_search ( " GraphemeBreakProperty " sv ,  " grapheme_break_property " sv ,  " s_grapheme_break_properties " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_search ( " WordBreakProperty " sv ,  " word_break_property " sv ,  " s_word_break_properties " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    append_prop_search ( " SentenceBreakProperty " sv ,  " sentence_break_property " sv ,  " s_sentence_break_properties " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    generator . append ( R " ~~~( 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 21:45:09 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-02 10:47:41 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								) ~ ~ ~ " ); 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( file . write ( generator . as_string_view ( ) . bytes ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  Vector < u32 >  flatten_code_point_ranges ( Vector < CodePointRange >  const &  code_points ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < u32 >  flattened ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  const &  range  :  code_points )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        flattened . grow_capacity ( range . last  -  range . first ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( u32  code_point  =  range . first ;  code_point  < =  range . last ;  + + code_point ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            flattened . append ( code_point ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  flattened ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  Vector < CodePointRange >  form_code_point_ranges ( Vector < u32 >  code_points ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Vector < CodePointRange >  ranges ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  range_start  =  code_points [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    u32  range_end  =  range_start ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( size_t  i  =  1 ;  i  <  code_points . size ( ) ;  + + i )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        u32  code_point  =  code_points [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( ( code_point  -  range_end )  = =  1 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            range_end  =  code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            ranges . append ( {  range_start ,  range_end  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            range_start  =  code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            range_end  =  code_point ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    ranges . append ( {  range_start ,  range_end  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  ranges ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								static  void  sort_and_merge_code_point_ranges ( Vector < CodePointRange > &  code_points ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    quick_sort ( code_points ,  [ ] ( auto  const &  range1 ,  auto  const &  range2 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  range1 . first  <  range2 . first ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( size_t  i  =  0 ;  i  <  code_points . size ( )  -  1 ; )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        if  ( code_points [ i ] . last  > =  code_points [ i  +  1 ] . first )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            code_points [ i ] . last  =  max ( code_points [ i ] . last ,  code_points [ i  +  1 ] . last ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            code_points . remove ( i  +  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        }  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            + + i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  all_code_points  =  flatten_code_point_ranges ( code_points ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    code_points  =  form_code_point_ranges ( all_code_points ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  void  populate_general_category_unions ( PropList &  general_categories ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // The Unicode standard defines General Category values which are not in any UCD file. These
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // values are simply unions of other values.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // https://www.unicode.org/reports/tr44/#GC_Values_Table
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  populate_union  =  [ & ] ( auto  alias ,  auto  categories )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto &  code_points  =  general_categories . ensure ( alias ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  category  :  categories ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            code_points . extend ( general_categories . find ( category ) - > value ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        sort_and_merge_code_point_ranges ( code_points ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " LC " sv ,  Array  {  " Ll " sv ,  " Lu " sv ,  " Lt " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " L " sv ,  Array  {  " Lu " sv ,  " Ll " sv ,  " Lt " sv ,  " Lm " sv ,  " Lo " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " M " sv ,  Array  {  " Mn " sv ,  " Mc " sv ,  " Me " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " N " sv ,  Array  {  " Nd " sv ,  " Nl " sv ,  " No " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " P " sv ,  Array  {  " Pc " sv ,  " Pd " sv ,  " Ps " sv ,  " Pe " sv ,  " Pi " sv ,  " Pf " sv ,  " Po " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " S " sv ,  Array  {  " Sm " sv ,  " Sc " sv ,  " Sk " sv ,  " So " sv  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    populate_union ( " Z " sv ,  Array  {  " Zs " sv ,  " Zl " sv ,  " Zp " sv  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:27:31 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    populate_union ( " C " sv ,  Array  {  " Cc " sv ,  " Cf " sv ,  " Cs " sv ,  " Co " sv ,  " Cn " sv  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								static  void  normalize_script_extensions ( PropList &  script_extensions ,  PropList  const &  script_list ,  Vector < Alias >  const &  script_aliases ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // The ScriptExtensions UCD file lays out its code point ranges rather uniquely compared to
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // other files. The Script listed on each line may either be a full Script string or an aliased
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // abbreviation. Further, the extensions may or may not include the base Script list. Normalize
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // the extensions here to be keyed by the full Script name and always include the base list.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  extensions  =  move ( script_extensions ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    script_extensions  =  script_list ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    for  ( auto  const &  extension  :  extensions )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  it  =  find_if ( script_aliases . begin ( ) ,  script_aliases . end ( ) ,  [ & ] ( auto  const &  alias )  {  return  extension . key  = =  alias . alias ;  } ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-17 08:14:56 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								        auto  const &  key  =  ( it  = =  script_aliases . end ( ) )  ?  extension . key  :  it - > name ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto &  code_points  =  script_extensions . find ( key ) - > value ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        code_points . extend ( extension . value ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        sort_and_merge_code_point_ranges ( code_points ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 17:42:21 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // Lastly, the Common and Inherited script extensions are special. They must not contain any
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    // code points which appear in other script extensions. The ScriptExtensions UCD file does not
 
							 
						 
					
						
							
								
									
										
										
										
											2021-09-30 19:57:52 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // list these extensions, therefore this peculiarity must be handled programmatically.
 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 17:42:21 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    // https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  code_point_has_other_extension  =  [ & ] ( StringView  key ,  u32  code_point )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        for  ( auto  const &  extension  :  extensions )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( extension . key  = =  key ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								            if  ( any_of ( extension . value ,  [ & ] ( auto  const &  r )  {  return  ( r . first  < =  code_point )  & &  ( code_point  < =  r . last ) ;  } ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								                return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  get_code_points_without_other_extensions  =  [ & ] ( StringView  key )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        auto  code_points  =  flatten_code_point_ranges ( script_list . find ( key ) - > value ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        code_points . remove_all_matching ( [ & ] ( u32  c )  {  return  code_point_has_other_extension ( key ,  c ) ;  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								        return  code_points ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  common_code_points  =  get_code_points_without_other_extensions ( " Common " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    script_extensions . set ( " Common " sv ,  form_code_point_ranges ( common_code_points ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  inherited_code_points  =  get_code_points_without_other_extensions ( " Inherited " sv ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    script_extensions . set ( " Inherited " sv ,  form_code_point_ranges ( inherited_code_points ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 10:32:01 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								ErrorOr < int >  serenity_main ( Main : : Arguments  arguments ) 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								{ 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 10:32:01 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringView  generated_header_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  generated_implementation_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  unicode_data_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  special_casing_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  derived_general_category_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  prop_list_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  derived_core_prop_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  derived_binary_prop_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  prop_alias_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  prop_value_alias_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  name_alias_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  scripts_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  script_extensions_path ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringView  blocks_path ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 10:32:01 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringView  emoji_data_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  normalization_path ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    StringView  grapheme_break_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  word_break_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    StringView  sentence_break_path ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    Core : : ArgsParser  args_parser ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-02 10:47:41 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( generated_header_path ,  " Path to the Unicode Data header file to generate " ,  " generated-header-path " ,  ' h ' ,  " generated-header-path " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( generated_implementation_path ,  " Path to the Unicode Data implementation file to generate " ,  " generated-implementation-path " ,  ' c ' ,  " generated-implementation-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( unicode_data_path ,  " Path to UnicodeData.txt file " ,  " unicode-data-path " ,  ' u ' ,  " unicode-data-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( special_casing_path ,  " Path to SpecialCasing.txt file " ,  " special-casing-path " ,  ' s ' ,  " special-casing-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( derived_general_category_path ,  " Path to DerivedGeneralCategory.txt file " ,  " derived-general-category-path " ,  ' g ' ,  " derived-general-category-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 17:37:47 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( prop_list_path ,  " Path to PropList.txt file " ,  " prop-list-path " ,  ' p ' ,  " prop-list-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 18:39:41 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( derived_core_prop_path ,  " Path to DerivedCoreProperties.txt file " ,  " derived-core-prop-path " ,  ' d ' ,  " derived-core-prop-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:53:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( derived_binary_prop_path ,  " Path to DerivedBinaryProperties.txt file " ,  " derived-binary-prop-path " ,  ' b ' ,  " derived-binary-prop-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-28 20:14:42 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( prop_alias_path ,  " Path to PropertyAliases.txt file " ,  " prop-alias-path " ,  ' a ' ,  " prop-alias-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-31 13:45:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( prop_value_alias_path ,  " Path to PropertyValueAliases.txt file " ,  " prop-value-alias-path " ,  ' v ' ,  " prop-value-alias-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-18 23:23:58 +01:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( name_alias_path ,  " Path to NameAliases.txt file " ,  " name-alias-path " ,  ' m ' ,  " name-alias-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( scripts_path ,  " Path to Scripts.txt file " ,  " scripts-path " ,  ' r ' ,  " scripts-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:05:30 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( script_extensions_path ,  " Path to ScriptExtensions.txt file " ,  " script-extensions-path " ,  ' x ' ,  " script-extensions-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( blocks_path ,  " Path to Blocks.txt file " ,  " blocks-path " ,  ' k ' ,  " blocks-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-04 07:46:36 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( emoji_data_path ,  " Path to emoji-data.txt file " ,  " emoji-data-path " ,  ' e ' ,  " emoji-data-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:29:28 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( normalization_path ,  " Path to DerivedNormalizationProps.txt file " ,  " normalization-path " ,  ' n ' ,  " normalization-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-01-30 23:03:19 +02:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( grapheme_break_path ,  " Path to GraphemeBreakProperty.txt file " ,  " grapheme-break-path " ,  ' f ' ,  " grapheme-break-path " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( word_break_path ,  " Path to WordBreakProperty.txt file " ,  " word-break-path " ,  ' w ' ,  " word-break-path " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    args_parser . add_option ( sentence_break_path ,  " Path to SentenceBreakProperty.txt file " ,  " sentence-break-path " ,  ' i ' ,  " sentence-break-path " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-11-23 10:32:01 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    args_parser . parse ( arguments ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  generated_header_file  =  TRY ( open_file ( generated_header_path ,  Core : : Stream : : OpenMode : : Write ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  generated_implementation_file  =  TRY ( open_file ( generated_implementation_path ,  Core : : Stream : : OpenMode : : Write ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  unicode_data_file  =  TRY ( open_file ( unicode_data_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  derived_general_category_file  =  TRY ( open_file ( derived_general_category_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  special_casing_file  =  TRY ( open_file ( special_casing_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  prop_list_file  =  TRY ( open_file ( prop_list_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  derived_core_prop_file  =  TRY ( open_file ( derived_core_prop_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  derived_binary_prop_file  =  TRY ( open_file ( derived_binary_prop_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  prop_alias_file  =  TRY ( open_file ( prop_alias_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  prop_value_alias_file  =  TRY ( open_file ( prop_value_alias_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  name_alias_file  =  TRY ( open_file ( name_alias_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  scripts_file  =  TRY ( open_file ( scripts_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  script_extensions_file  =  TRY ( open_file ( script_extensions_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  blocks_file  =  TRY ( open_file ( blocks_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    auto  emoji_data_file  =  TRY ( open_file ( emoji_data_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  normalization_file  =  TRY ( open_file ( normalization_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  grapheme_break_file  =  TRY ( open_file ( grapheme_break_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  word_break_file  =  TRY ( open_file ( word_break_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    auto  sentence_break_file  =  TRY ( open_file ( sentence_break_path ,  Core : : Stream : : OpenMode : : Read ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 18:24:39 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-27 10:39:37 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    UnicodeData  unicode_data  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( parse_special_casing ( * special_casing_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * derived_general_category_file ,  unicode_data . general_categories ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * prop_list_file ,  unicode_data . prop_list ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * derived_core_prop_file ,  unicode_data . prop_list ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * derived_binary_prop_file ,  unicode_data . prop_list ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * emoji_data_file ,  unicode_data . prop_list ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_normalization_props ( * normalization_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_alias_list ( * prop_alias_file ,  unicode_data . prop_list ,  unicode_data . prop_aliases ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * scripts_file ,  unicode_data . script_list ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * script_extensions_file ,  unicode_data . script_extensions ,  true ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( parse_block_display_names ( * blocks_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * blocks_file ,  unicode_data . block_list ,  false ,  true ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( parse_name_aliases ( * name_alias_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * grapheme_break_file ,  unicode_data . grapheme_break_props ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * word_break_file ,  unicode_data . word_break_props ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_prop_list ( * sentence_break_file ,  unicode_data . sentence_break_props ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 17:11:19 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 07:17:24 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    populate_general_category_unions ( unicode_data . general_categories ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( parse_unicode_data ( * unicode_data_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_value_alias_list ( * prop_value_alias_file ,  " gc " sv ,  unicode_data . general_categories . keys ( ) ,  unicode_data . general_category_aliases ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( parse_value_alias_list ( * prop_value_alias_file ,  " sc " sv ,  unicode_data . script_list . keys ( ) ,  unicode_data . script_aliases ,  false ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-13 13:39:19 -05:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( parse_value_alias_list ( * prop_value_alias_file ,  " blk " sv ,  unicode_data . block_list . keys ( ) ,  unicode_data . block_aliases ,  false ,  true ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-10 15:00:05 -04:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    normalize_script_extensions ( unicode_data . script_extensions ,  unicode_data . script_list ,  unicode_data . script_aliases ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-06 20:13:12 +00:00 
										
									 
								 
							 
							
								
									
										 
									 
								
							 
							
								 
							 
							
							
								    TRY ( generate_unicode_data_header ( * generated_header_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    TRY ( generate_unicode_data_implementation ( * generated_implementation_file ,  unicode_data ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-25 15:10:51 -04:00 
										
									 
								 
							 
							
								
							 
							
								 
							 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								    return  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							 
							
							
								}