mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 11:14:33 +00:00 
			
		
		
		
	#5127: Even on narrow unicode builds, the C functions that access the Unicode
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept and return characters from the full Unicode range (Py_UCS4). The differences from Python code are few: - unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit() now return the correct value for large code points - repr() may consider more characters as printable.
This commit is contained in:
		
							parent
							
								
									36e778ef02
								
							
						
					
					
						commit
						324ac65ceb
					
				
					 7 changed files with 69 additions and 232 deletions
				
			
		|  | @ -221,24 +221,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; | |||
| # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString | ||||
| # define _PyUnicode_Fini _PyUnicodeUCS2_Fini | ||||
| # define _PyUnicode_Init _PyUnicodeUCS2_Init | ||||
| # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha | ||||
| # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit | ||||
| # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit | ||||
| # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak | ||||
| # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase | ||||
| # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric | ||||
| # define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable | ||||
| # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase | ||||
| # define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart | ||||
| # define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue | ||||
| # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase | ||||
| # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace | ||||
| # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit | ||||
| # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit | ||||
| # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase | ||||
| # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric | ||||
| # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase | ||||
| # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
|  | @ -322,24 +304,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE; | |||
| # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString | ||||
| # define _PyUnicode_Fini _PyUnicodeUCS4_Fini | ||||
| # define _PyUnicode_Init _PyUnicodeUCS4_Init | ||||
| # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha | ||||
| # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit | ||||
| # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit | ||||
| # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak | ||||
| # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase | ||||
| # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric | ||||
| # define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable | ||||
| # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase | ||||
| # define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart | ||||
| # define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue | ||||
| # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase | ||||
| # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace | ||||
| # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit | ||||
| # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit | ||||
| # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase | ||||
| # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric | ||||
| # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase | ||||
| # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase | ||||
| 
 | ||||
| 
 | ||||
| #endif | ||||
|  | @ -351,7 +315,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; | |||
|    configure Python using --with-wctype-functions.  This reduces the | ||||
|    interpreter's code size. */ | ||||
| 
 | ||||
| #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) | ||||
| #if defined(Py_UNICODE_WIDE) && defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) | ||||
| 
 | ||||
| #include <wctype.h> | ||||
| 
 | ||||
|  | @ -1542,75 +1506,75 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; | |||
| */ | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsLowercase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsUppercase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsTitlecase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsXidStart( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsXidContinue( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsWhitespace( | ||||
|     const Py_UNICODE ch         /* Unicode character */ | ||||
|     const Py_UCS4 ch         /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsLinebreak( | ||||
|     const Py_UNICODE ch         /* Unicode character */ | ||||
|     const Py_UCS4 ch         /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
| PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
| PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
| PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_ToDigit( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(double) _PyUnicode_ToNumeric( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsDigit( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsNumeric( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsPrintable( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(int) _PyUnicode_IsAlpha( | ||||
|     Py_UNICODE ch       /* Unicode character */ | ||||
|     Py_UCS4 ch       /* Unicode character */ | ||||
|     ); | ||||
| 
 | ||||
| PyAPI_FUNC(size_t) Py_UNICODE_strlen( | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Amaury Forgeot d'Arc
						Amaury Forgeot d'Arc