mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 18:54:53 +00:00 
			
		
		
		
	bpo-34485: Add _PyCoreConfig.stdio_encoding (GH-8881)
* Add stdio_encoding and stdio_errors fields to _PyCoreConfig. * Add unit tests on stdio_encoding and stdio_errors.
This commit is contained in:
		
							parent
							
								
									177d921c8c
								
							
						
					
					
						commit
						dfe0dc7453
					
				
					 6 changed files with 265 additions and 135 deletions
				
			
		|  | @ -184,27 +184,6 @@ get_codec_name(const char *encoding) | |||
|     return NULL; | ||||
| } | ||||
| 
 | ||||
| static _PyInitError | ||||
| get_locale_encoding(char **locale_encoding) | ||||
| { | ||||
| #ifdef MS_WINDOWS | ||||
|     char encoding[20]; | ||||
|     PyOS_snprintf(encoding, sizeof(encoding), "cp%d", GetACP()); | ||||
| #elif defined(__ANDROID__) | ||||
|     const char *encoding = "UTF-8"; | ||||
| #else | ||||
|     const char *encoding = nl_langinfo(CODESET); | ||||
|     if (!encoding || encoding[0] == '\0') { | ||||
|         return _Py_INIT_USER_ERR("failed to get the locale encoding: " | ||||
|                                  "nl_langinfo(CODESET) failed"); | ||||
|     } | ||||
| #endif | ||||
|     *locale_encoding = _PyMem_RawStrdup(encoding); | ||||
|     if (*locale_encoding == NULL) { | ||||
|         return _Py_INIT_NO_MEMORY(); | ||||
|     } | ||||
|     return _Py_INIT_OK(); | ||||
| } | ||||
| 
 | ||||
| static _PyInitError | ||||
| initimport(PyInterpreterState *interp, PyObject *sysmod) | ||||
|  | @ -340,35 +319,20 @@ static _LocaleCoercionTarget _TARGET_LOCALES[] = { | |||
|     {NULL} | ||||
| }; | ||||
| 
 | ||||
| static const char * | ||||
| get_stdio_errors(void) | ||||
| 
 | ||||
| int | ||||
| _Py_IsLocaleCoercionTarget(const char *ctype_loc) | ||||
| { | ||||
| #ifndef MS_WINDOWS | ||||
|     const char *ctype_loc = setlocale(LC_CTYPE, NULL); | ||||
|     if (ctype_loc != NULL) { | ||||
|         /* surrogateescape is the default in the legacy C and POSIX locales */ | ||||
|         if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) { | ||||
|             return "surrogateescape"; | ||||
|     const _LocaleCoercionTarget *target = NULL; | ||||
|     for (target = _TARGET_LOCALES; target->locale_name; target++) { | ||||
|         if (strcmp(ctype_loc, target->locale_name) == 0) { | ||||
|             return 1; | ||||
|         } | ||||
| 
 | ||||
| #ifdef PY_COERCE_C_LOCALE | ||||
|         /* surrogateescape is the default in locale coercion target locales */ | ||||
|         const _LocaleCoercionTarget *target = NULL; | ||||
|         for (target = _TARGET_LOCALES; target->locale_name; target++) { | ||||
|             if (strcmp(ctype_loc, target->locale_name) == 0) { | ||||
|                 return "surrogateescape"; | ||||
|             } | ||||
|         } | ||||
| #endif | ||||
|     } | ||||
| 
 | ||||
|     return "strict"; | ||||
| #else | ||||
|     /* On Windows, always use surrogateescape by default */ | ||||
|     return "surrogateescape"; | ||||
| #endif | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #ifdef PY_COERCE_C_LOCALE | ||||
| static const char C_LOCALE_COERCION_WARNING[] = | ||||
|     "Python detected LC_CTYPE=C: LC_CTYPE coerced to %.20s (set another locale " | ||||
|  | @ -1533,8 +1497,10 @@ initfsencoding(PyInterpreterState *interp) | |||
|             Py_HasFileSystemDefaultEncoding = 1; | ||||
|         } | ||||
|         else { | ||||
|             extern _PyInitError _Py_get_locale_encoding(char **locale_encoding); | ||||
| 
 | ||||
|             char *locale_encoding; | ||||
|             _PyInitError err = get_locale_encoding(&locale_encoding); | ||||
|             _PyInitError err = _Py_get_locale_encoding(&locale_encoding); | ||||
|             if (_Py_INIT_FAILED(err)) { | ||||
|                 return err; | ||||
|             } | ||||
|  | @ -1740,13 +1706,16 @@ init_sys_streams(PyInterpreterState *interp) | |||
|     PyObject *std = NULL; | ||||
|     int fd; | ||||
|     PyObject * encoding_attr; | ||||
|     char *pythonioencoding = NULL; | ||||
|     const char *encoding, *errors; | ||||
|     char *locale_encoding = NULL; | ||||
|     char *codec_name = NULL; | ||||
|     _PyInitError res = _Py_INIT_OK(); | ||||
|     extern char *_Py_StandardStreamEncoding; | ||||
|     extern char *_Py_StandardStreamErrors; | ||||
|     _PyCoreConfig *config = &interp->core_config; | ||||
| 
 | ||||
|     char *codec_name = get_codec_name(config->stdio_encoding); | ||||
|     if (codec_name == NULL) { | ||||
|         return _Py_INIT_ERR("failed to get the Python codec name " | ||||
|                             "of the stdio encoding"); | ||||
|     } | ||||
|     PyMem_RawFree(config->stdio_encoding); | ||||
|     config->stdio_encoding = codec_name; | ||||
| 
 | ||||
|     /* Hack to avoid a nasty recursion issue when Python is invoked
 | ||||
|        in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ | ||||
|  | @ -1778,85 +1747,15 @@ init_sys_streams(PyInterpreterState *interp) | |||
|     } | ||||
|     Py_DECREF(wrapper); | ||||
| 
 | ||||
|     encoding = _Py_StandardStreamEncoding; | ||||
|     errors = _Py_StandardStreamErrors; | ||||
|     if (!encoding || !errors) { | ||||
|         char *opt = Py_GETENV("PYTHONIOENCODING"); | ||||
|         if (opt && opt[0] != '\0') { | ||||
|             char *err; | ||||
|             pythonioencoding = _PyMem_Strdup(opt); | ||||
|             if (pythonioencoding == NULL) { | ||||
|                 PyErr_NoMemory(); | ||||
|                 goto error; | ||||
|             } | ||||
|             err = strchr(pythonioencoding, ':'); | ||||
|             if (err) { | ||||
|                 *err = '\0'; | ||||
|                 err++; | ||||
|                 if (!err[0]) { | ||||
|                     err = NULL; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             /* Does PYTHONIOENCODING contain an encoding? */ | ||||
|             if (pythonioencoding[0]) { | ||||
|                 if (!encoding) { | ||||
|                     encoding = pythonioencoding; | ||||
|                 } | ||||
| 
 | ||||
|                 /* If the encoding is set but not the error handler,
 | ||||
|                    use "strict" error handler by default. | ||||
|                    PYTHONIOENCODING=latin1 behaves as | ||||
|                    PYTHONIOENCODING=latin1:strict. */ | ||||
|                 if (!err) { | ||||
|                     err = "strict"; | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             if (!errors && err != NULL) { | ||||
|                 errors = err; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (interp->core_config.utf8_mode) { | ||||
|             if (!encoding) { | ||||
|                 encoding = "utf-8"; | ||||
|             } | ||||
|             if (!errors) { | ||||
|                 errors = "surrogateescape"; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (!errors) { | ||||
|             /* Choose the default error handler based on the current locale */ | ||||
|             errors = get_stdio_errors(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (encoding == NULL) { | ||||
|         _PyInitError err = get_locale_encoding(&locale_encoding); | ||||
|         if (_Py_INIT_FAILED(err)) { | ||||
|             return err; | ||||
|         } | ||||
|         encoding = locale_encoding; | ||||
|     } | ||||
| 
 | ||||
|     codec_name = get_codec_name(encoding); | ||||
|     if (codec_name == NULL) { | ||||
|         PyErr_SetString(PyExc_RuntimeError, | ||||
|                         "failed to get the Python codec name " | ||||
|                         "of stdio encoding"); | ||||
|         goto error; | ||||
|     } | ||||
|     encoding = codec_name; | ||||
| 
 | ||||
|     /* Set sys.stdin */ | ||||
|     fd = fileno(stdin); | ||||
|     /* Under some conditions stdin, stdout and stderr may not be connected
 | ||||
|      * and fileno() may point to an invalid file descriptor. For example | ||||
|      * GUI apps don't have valid standard streams by default. | ||||
|      */ | ||||
|     std = create_stdio(iomod, fd, 0, "<stdin>", encoding, errors); | ||||
|     std = create_stdio(iomod, fd, 0, "<stdin>", | ||||
|                        config->stdio_encoding, | ||||
|                        config->stdio_errors); | ||||
|     if (std == NULL) | ||||
|         goto error; | ||||
|     PySys_SetObject("__stdin__", std); | ||||
|  | @ -1865,7 +1764,9 @@ init_sys_streams(PyInterpreterState *interp) | |||
| 
 | ||||
|     /* Set sys.stdout */ | ||||
|     fd = fileno(stdout); | ||||
|     std = create_stdio(iomod, fd, 1, "<stdout>", encoding, errors); | ||||
|     std = create_stdio(iomod, fd, 1, "<stdout>", | ||||
|                        config->stdio_encoding, | ||||
|                        config->stdio_errors); | ||||
|     if (std == NULL) | ||||
|         goto error; | ||||
|     PySys_SetObject("__stdout__", std); | ||||
|  | @ -1875,7 +1776,9 @@ init_sys_streams(PyInterpreterState *interp) | |||
| #if 1 /* Disable this if you have trouble debugging bootstrap stuff */ | ||||
|     /* Set sys.stderr, replaces the preliminary stderr */ | ||||
|     fd = fileno(stderr); | ||||
|     std = create_stdio(iomod, fd, 1, "<stderr>", encoding, "backslashreplace"); | ||||
|     std = create_stdio(iomod, fd, 1, "<stderr>", | ||||
|                        config->stdio_encoding, | ||||
|                        "backslashreplace"); | ||||
|     if (std == NULL) | ||||
|         goto error; | ||||
| 
 | ||||
|  | @ -1911,9 +1814,6 @@ init_sys_streams(PyInterpreterState *interp) | |||
| done: | ||||
|     _Py_ClearStandardStreamEncoding(); | ||||
| 
 | ||||
|     PyMem_RawFree(locale_encoding); | ||||
|     PyMem_RawFree(codec_name); | ||||
|     PyMem_Free(pythonioencoding); | ||||
|     Py_XDECREF(bimod); | ||||
|     Py_XDECREF(iomod); | ||||
|     return res; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner