mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	closes gh-124016: update Unicode to 16.0.0 (#124017)
This commit is contained in:
		
							parent
							
								
									a9594a34c6
								
							
						
					
					
						commit
						bb904e063d
					
				
					 12 changed files with 23668 additions and 21778 deletions
				
			
		|  | @ -1679,7 +1679,7 @@ expression support in the :mod:`re` module). | |||
| 
 | ||||
|    The casefolding algorithm is | ||||
|    `described in section 3.13 'Default Case Folding' of the Unicode Standard | ||||
|    <https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf>`__. | ||||
|    <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__. | ||||
| 
 | ||||
|    .. versionadded:: 3.3 | ||||
| 
 | ||||
|  | @ -1843,7 +1843,7 @@ expression support in the :mod:`re` module). | |||
|    property being one of "Lm", "Lt", "Lu", "Ll", or "Lo".  Note that this is different | ||||
|    from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and | ||||
|    Ideographic' of the Unicode Standard | ||||
|    <https://www.unicode.org/versions/Unicode15.1.0/ch04.pdf>`_. | ||||
|    <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-4/#G91002>`_. | ||||
| 
 | ||||
| 
 | ||||
| .. method:: str.isascii() | ||||
|  | @ -1979,7 +1979,7 @@ expression support in the :mod:`re` module). | |||
| 
 | ||||
|    The lowercasing algorithm used is | ||||
|    `described in section 3.13 'Default Case Folding' of the Unicode Standard | ||||
|    <https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf>`__. | ||||
|    <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__. | ||||
| 
 | ||||
| 
 | ||||
| .. method:: str.lstrip([chars]) | ||||
|  | @ -2331,7 +2331,7 @@ expression support in the :mod:`re` module). | |||
| 
 | ||||
|    The uppercasing algorithm used is | ||||
|    `described in section 3.13 'Default Case Folding' of the Unicode Standard | ||||
|    <https://www.unicode.org/versions/Unicode15.1.0/ch03.pdf>`__. | ||||
|    <https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-3/#G33992>`__. | ||||
| 
 | ||||
| 
 | ||||
| .. method:: str.zfill(width) | ||||
|  |  | |||
|  | @ -17,8 +17,8 @@ | |||
| 
 | ||||
| This module provides access to the Unicode Character Database (UCD) which | ||||
| defines character properties for all Unicode characters. The data contained in | ||||
| this database is compiled from the `UCD version 15.1.0 | ||||
| <https://www.unicode.org/Public/15.1.0/ucd>`_. | ||||
| this database is compiled from the `UCD version 16.0.0 | ||||
| <https://www.unicode.org/Public/16.0.0/ucd>`_. | ||||
| 
 | ||||
| The module uses the same names and symbols as defined by Unicode | ||||
| Standard Annex #44, `"Unicode Character Database" | ||||
|  | @ -175,6 +175,6 @@ Examples: | |||
| 
 | ||||
| .. rubric:: Footnotes | ||||
| 
 | ||||
| .. [#] https://www.unicode.org/Public/15.1.0/ucd/NameAliases.txt | ||||
| .. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt | ||||
| 
 | ||||
| .. [#] https://www.unicode.org/Public/15.1.0/ucd/NamedSequences.txt | ||||
| .. [#] https://www.unicode.org/Public/16.0.0/ucd/NamedSequences.txt | ||||
|  |  | |||
|  | @ -314,7 +314,7 @@ The Unicode category codes mentioned above stand for: | |||
| * *Nd* - decimal numbers | ||||
| * *Pc* - connector punctuations | ||||
| * *Other_ID_Start* - explicit list of characters in `PropList.txt | ||||
|   <https://www.unicode.org/Public/15.1.0/ucd/PropList.txt>`_ to support backwards | ||||
|   <https://www.unicode.org/Public/16.0.0/ucd/PropList.txt>`_ to support backwards | ||||
|   compatibility | ||||
| * *Other_ID_Continue* - likewise | ||||
| 
 | ||||
|  | @ -322,8 +322,8 @@ All identifiers are converted into the normal form NFKC while parsing; compariso | |||
| of identifiers is based on NFKC. | ||||
| 
 | ||||
| A non-normative HTML file listing all valid identifier characters for Unicode | ||||
| 15.1.0 can be found at | ||||
| https://www.unicode.org/Public/15.1.0/ucd/DerivedCoreProperties.txt | ||||
| 16.0.0 can be found at | ||||
| https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt | ||||
| 
 | ||||
| 
 | ||||
| .. _keywords: | ||||
|  | @ -1044,4 +1044,4 @@ occurrence outside string literals and comments is an unconditional error: | |||
| 
 | ||||
| .. rubric:: Footnotes | ||||
| 
 | ||||
| .. [#] https://www.unicode.org/Public/15.1.0/ucd/NameAliases.txt | ||||
| .. [#] https://www.unicode.org/Public/16.0.0/ucd/NameAliases.txt | ||||
|  |  | |||
|  | @ -253,6 +253,11 @@ symtable | |||
| 
 | ||||
|   (Contributed by Bénédikt Tran in :gh:`120029`.) | ||||
| 
 | ||||
| unicodedata | ||||
| ----------- | ||||
| 
 | ||||
| * The Unicode database has been updated to Unicode 16.0.0. | ||||
| 
 | ||||
| .. Add improved modules above alphabetically, not here at the end. | ||||
| 
 | ||||
| Optimizations | ||||
|  |  | |||
|  | @ -1132,8 +1132,8 @@ def test_capitalize_nonascii(self): | |||
|         self.checkequal('\u2160\u2171\u2172', | ||||
|                         '\u2170\u2171\u2172', 'capitalize') | ||||
|         # check with Ll chars with no upper - nothing changes here | ||||
|         self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7', | ||||
|                         '\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize') | ||||
|         self.checkequal('\u1d00\u1d86\u0221\u1fb7', | ||||
|                         '\u1d00\u1d86\u0221\u1fb7', 'capitalize') | ||||
| 
 | ||||
|     def test_startswith(self): | ||||
|         self.checkequal(True, 'hello', 'startswith', 'he') | ||||
|  |  | |||
|  | @ -2430,8 +2430,10 @@ def __repr__(self): | |||
|         self.assertEqual(repr(s1()), '\\n') | ||||
| 
 | ||||
|     def test_printable_repr(self): | ||||
|         self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable | ||||
|         self.assertEqual(repr('\U00014000'), "'\\U00014000'")     # nonprintable | ||||
|         # printable | ||||
|         self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) | ||||
|         # nonprintable (private use area) | ||||
|         self.assertEqual(repr('\U00100001'), "'\\U00100001'") | ||||
| 
 | ||||
|     # This test only affects 32-bit platforms because expandtabs can only take | ||||
|     # an int as the max value, not a 64-bit C long.  If expandtabs is changed | ||||
|  |  | |||
|  | @ -18,7 +18,7 @@ | |||
| class UnicodeMethodsTest(unittest.TestCase): | ||||
| 
 | ||||
|     # update this, if the database changes | ||||
|     expectedchecksum = '63aa77dcb36b0e1df082ee2a6071caeda7f0955e' | ||||
|     expectedchecksum = '9e43ee3929471739680c0e705482b4ae1c4122e4' | ||||
| 
 | ||||
|     @requires_resource('cpu') | ||||
|     def test_method_checksum(self): | ||||
|  | @ -71,7 +71,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): | |||
| 
 | ||||
|     # Update this if the database changes. Make sure to do a full rebuild | ||||
|     # (e.g. 'make distclean && make') to get the correct checksum. | ||||
|     expectedchecksum = '232affd2a50ec4bd69d2482aa0291385cbdefaba' | ||||
|     expectedchecksum = '23ab09ed4abdf93db23b97359108ed630dd8311d' | ||||
| 
 | ||||
|     @requires_resource('cpu') | ||||
|     def test_function_checksum(self): | ||||
|  |  | |||
|  | @ -0,0 +1 @@ | |||
| Update :mod:`unicodedata` database to Unicode 16.0.0. | ||||
							
								
								
									
										6751
									
								
								Modules/unicodedata_db.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										6751
									
								
								Modules/unicodedata_db.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										33546
									
								
								Modules/unicodename_db.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										33546
									
								
								Modules/unicodename_db.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										5103
									
								
								Objects/unicodetype_db.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										5103
									
								
								Objects/unicodetype_db.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -44,7 +44,7 @@ | |||
| #   * Doc/library/stdtypes.rst, and | ||||
| #   * Doc/library/unicodedata.rst | ||||
| #   * Doc/reference/lexical_analysis.rst (two occurrences) | ||||
| UNIDATA_VERSION = "15.1.0" | ||||
| UNIDATA_VERSION = "16.0.0" | ||||
| UNICODE_DATA = "UnicodeData%s.txt" | ||||
| COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" | ||||
| EASTASIAN_WIDTH = "EastAsianWidth%s.txt" | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Benjamin Peterson
						Benjamin Peterson