mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 18:54:53 +00:00 
			
		
		
		
	bpo-43667: Fix broken Unicode encoding in non-UTF locales on Solaris (GH-25096)
This commit is contained in:
		
							parent
							
								
									4908fae3d5
								
							
						
					
					
						commit
						9032cf5cb1
					
				
					 6 changed files with 194 additions and 0 deletions
				
			
		|  | @ -57,6 +57,10 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| #include <windows.h> | ||||
| #endif | ||||
| 
 | ||||
| #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION | ||||
| #include "pycore_fileutils.h"     // _Py_LocaleUsesNonUnicodeWchar() | ||||
| #endif | ||||
| 
 | ||||
| /* Uncomment to display statistics on interned strings at exit
 | ||||
|    in _PyUnicode_ClearInterned(). */ | ||||
| /* #define INTERNED_STATS 1 */ | ||||
|  | @ -2217,6 +2221,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size) | |||
|     if (size == 0) | ||||
|         _Py_RETURN_UNICODE_EMPTY(); | ||||
| 
 | ||||
| #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION | ||||
|     /* Oracle Solaris uses non-Unicode internal wchar_t form for
 | ||||
|        non-Unicode locales and hence needs conversion to UCS-4 first. */ | ||||
|     if (_Py_LocaleUsesNonUnicodeWchar()) { | ||||
|         wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size); | ||||
|         if (!converted) { | ||||
|             return NULL; | ||||
|         } | ||||
|         PyObject *unicode = _PyUnicode_FromUCS4(converted, size); | ||||
|         PyMem_Free(converted); | ||||
|         return unicode; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     /* Single character Unicode objects in the Latin-1 range are
 | ||||
|        shared when using this constructor */ | ||||
|     if (size == 1 && (Py_UCS4)*u < 256) | ||||
|  | @ -3295,6 +3313,17 @@ PyUnicode_AsWideChar(PyObject *unicode, | |||
|         res = size; | ||||
|     } | ||||
|     unicode_copy_as_widechar(unicode, w, size); | ||||
| 
 | ||||
| #if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION | ||||
|     /* Oracle Solaris uses non-Unicode internal wchar_t form for
 | ||||
|        non-Unicode locales and hence needs conversion first. */ | ||||
|     if (_Py_LocaleUsesNonUnicodeWchar()) { | ||||
|         if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) { | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
|  | @ -3321,6 +3350,17 @@ PyUnicode_AsWideCharString(PyObject *unicode, | |||
|         return NULL; | ||||
|     } | ||||
|     unicode_copy_as_widechar(unicode, buffer, buflen + 1); | ||||
| 
 | ||||
| #if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION | ||||
|     /* Oracle Solaris uses non-Unicode internal wchar_t form for
 | ||||
|        non-Unicode locales and hence needs conversion first. */ | ||||
|     if (_Py_LocaleUsesNonUnicodeWchar()) { | ||||
|         if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) { | ||||
|             return NULL; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     if (size != NULL) { | ||||
|         *size = buflen; | ||||
|     } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Jakub Kulík
						Jakub Kulík