mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	bpo-34589: Add -X coerce_c_locale command line option (GH-9378)
Add a new -X coerce_c_locale command line option to control C locale coercion (PEP 538).
This commit is contained in:
		
							parent
							
								
									7a0791b699
								
							
						
					
					
						commit
						dbdee0073c
					
				
					 8 changed files with 160 additions and 52 deletions
				
			
		|  | @ -438,13 +438,22 @@ Miscellaneous options | |||
|      * Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to | ||||
|        ``True`` | ||||
| 
 | ||||
|    * ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding | ||||
|    * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding | ||||
|      the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8 | ||||
|      mode (even when it would otherwise activate automatically). | ||||
|      See :envvar:`PYTHONUTF8` for more details. | ||||
|    * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel | ||||
|      tree rooted at the given directory instead of to the code tree. See also | ||||
|      :envvar:`PYTHONPYCACHEPREFIX`. | ||||
|    * ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C | ||||
|      locale (:pep:`538`). | ||||
|      ``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX | ||||
|      locales to a more capable UTF-8 based alternative. | ||||
|      ``-X coerce_c_locale=warn`` will cause Python to emit warning messages on | ||||
|      ``stderr`` if either the locale coercion activates, or else if a locale | ||||
|      that *would* have triggered coercion is still active when the Python | ||||
|      runtime is initialized. | ||||
|      See :envvar:`PYTHONCOERCECLOCALE` for more details. | ||||
| 
 | ||||
|    It also allows passing arbitrary values and retrieving them through the | ||||
|    :data:`sys._xoptions` dictionary. | ||||
|  | @ -464,6 +473,9 @@ Miscellaneous options | |||
|    .. versionadded:: 3.7 | ||||
|       The ``-X importtime``, ``-X dev`` and ``-X utf8`` options. | ||||
| 
 | ||||
|    .. versionadded:: 3.7.1 | ||||
|       The ``-X coerce_c_locale`` option. | ||||
| 
 | ||||
|    .. versionadded:: 3.8 | ||||
|       The ``-X pycache_prefix`` option. | ||||
| 
 | ||||
|  | @ -850,6 +862,8 @@ conflict. | |||
|    order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for | ||||
|    system interfaces. | ||||
| 
 | ||||
|    Also available as the :option:`-X` ``coerce_c_locale`` option. | ||||
| 
 | ||||
|    Availability: \*nix | ||||
| 
 | ||||
|    .. versionadded:: 3.7 | ||||
|  |  | |||
|  | @ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables, | |||
| while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If | ||||
| this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before | ||||
| calling :c:func:`Py_Initialize`. | ||||
| 
 | ||||
| :c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale | ||||
| coercion (:pep:`538`) anymore: it is always disabled. It can now only be | ||||
| enabled by the Python program ("python3). | ||||
| 
 | ||||
| New :option:`-X` ``coerce_c_locale`` command line option to control C locale | ||||
| coercion (:pep:`538`). | ||||
|  |  | |||
|  | @ -139,7 +139,7 @@ def _handle_output_variations(data): | |||
|         return data | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_child_details(cls, env_vars): | ||||
|     def get_child_details(cls, env_vars, xoption=None): | ||||
|         """Retrieves fsencoding and standard stream details from a child process | ||||
| 
 | ||||
|         Returns (encoding_details, stderr_lines): | ||||
|  | @ -150,10 +150,11 @@ def get_child_details(cls, env_vars): | |||
|         The child is run in isolated mode if the current interpreter supports | ||||
|         that. | ||||
|         """ | ||||
|         result, py_cmd = run_python_until_end( | ||||
|             "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, | ||||
|             **env_vars | ||||
|         ) | ||||
|         args = [] | ||||
|         if xoption: | ||||
|             args.extend(("-X", f"coerce_c_locale={xoption}")) | ||||
|         args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT)) | ||||
|         result, py_cmd = run_python_until_end(*args, **env_vars) | ||||
|         if not result.rc == 0: | ||||
|             result.fail(py_cmd) | ||||
|         # All subprocess outputs in this test case should be pure ASCII | ||||
|  | @ -212,7 +213,8 @@ def _check_child_encoding_details(self, | |||
|                                       expected_fs_encoding, | ||||
|                                       expected_stream_encoding, | ||||
|                                       expected_warnings, | ||||
|                                       coercion_expected): | ||||
|                                       coercion_expected, | ||||
|                                       xoption=None): | ||||
|         """Check the C locale handling for the given process environment | ||||
| 
 | ||||
|         Parameters: | ||||
|  | @ -220,7 +222,7 @@ def _check_child_encoding_details(self, | |||
|             expected_stream_encoding: expected encoding for standard streams | ||||
|             expected_warning: stderr output to expect (if any) | ||||
|         """ | ||||
|         result = EncodingDetails.get_child_details(env_vars) | ||||
|         result = EncodingDetails.get_child_details(env_vars, xoption) | ||||
|         encoding_details, stderr_lines = result | ||||
|         expected_details = EncodingDetails.get_expected_details( | ||||
|             coercion_expected, | ||||
|  | @ -290,6 +292,7 @@ def _check_c_locale_coercion(self, | |||
|                                  coerce_c_locale, | ||||
|                                  expected_warnings=None, | ||||
|                                  coercion_expected=True, | ||||
|                                  use_xoption=False, | ||||
|                                  **extra_vars): | ||||
|         """Check the C locale handling for various configurations | ||||
| 
 | ||||
|  | @ -319,7 +322,11 @@ def _check_c_locale_coercion(self, | |||
|             "PYTHONCOERCECLOCALE": "", | ||||
|         } | ||||
|         base_var_dict.update(extra_vars) | ||||
|         xoption = None | ||||
|         if coerce_c_locale is not None: | ||||
|             if use_xoption: | ||||
|                 xoption = coerce_c_locale | ||||
|             else: | ||||
|                 base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale | ||||
| 
 | ||||
|         # Check behaviour for the default locale | ||||
|  | @ -342,7 +349,8 @@ def _check_c_locale_coercion(self, | |||
|                                                fs_encoding, | ||||
|                                                stream_encoding, | ||||
|                                                _expected_warnings, | ||||
|                                                _coercion_expected) | ||||
|                                                _coercion_expected, | ||||
|                                                xoption=xoption) | ||||
| 
 | ||||
|         # Check behaviour for explicitly configured locales | ||||
|         for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: | ||||
|  | @ -357,7 +365,8 @@ def _check_c_locale_coercion(self, | |||
|                                                        fs_encoding, | ||||
|                                                        stream_encoding, | ||||
|                                                        expected_warnings, | ||||
|                                                        coercion_expected) | ||||
|                                                        coercion_expected, | ||||
|                                                        xoption=xoption) | ||||
| 
 | ||||
|     def test_PYTHONCOERCECLOCALE_not_set(self): | ||||
|         # This should coerce to the first available target locale by default | ||||
|  | @ -404,6 +413,32 @@ def test_LC_ALL_set_to_C(self): | |||
|                                       expected_warnings=[LEGACY_LOCALE_WARNING], | ||||
|                                       coercion_expected=False) | ||||
| 
 | ||||
|     def test_xoption_set_to_1(self): | ||||
|         self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1", | ||||
|                                       use_xoption=True) | ||||
| 
 | ||||
|     def test_xoption_set_to_zero(self): | ||||
|         # The setting "0" should result in the locale coercion being disabled | ||||
|         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | ||||
|                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale="0", | ||||
|                                       coercion_expected=False, | ||||
|                                       use_xoption=True) | ||||
|         # Setting LC_ALL=C shouldn't make any difference to the behaviour | ||||
|         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | ||||
|                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale="0", | ||||
|                                       LC_ALL="C", | ||||
|                                       coercion_expected=False, | ||||
|                                       use_xoption=True) | ||||
| 
 | ||||
|     def test_xoption_set_to_warn(self): | ||||
|         # -X coerce_c_locale=warn enables runtime warnings for legacy locales | ||||
|         self._check_c_locale_coercion("utf-8", "utf-8", | ||||
|                                       coerce_c_locale="warn", | ||||
|                                       expected_warnings=[CLI_COERCION_WARNING], | ||||
|                                       use_xoption=True) | ||||
| 
 | ||||
| def test_main(): | ||||
|     test.support.run_unittest( | ||||
|         LocaleConfigurationTests, | ||||
|  |  | |||
|  | @ -159,13 +159,16 @@ def test_undecodable_code(self): | |||
|         env = os.environ.copy() | ||||
|         # Use C locale to get ascii for the locale encoding | ||||
|         env['LC_ALL'] = 'C' | ||||
|         env['PYTHONCOERCECLOCALE'] = '0' | ||||
|         code = ( | ||||
|             b'import locale; ' | ||||
|             b'print(ascii("' + undecodable + b'"), ' | ||||
|                 b'locale.getpreferredencoding())') | ||||
|         p = subprocess.Popen( | ||||
|             [sys.executable, "-c", code], | ||||
|             [sys.executable, | ||||
|              # Disable C locale coercion and UTF-8 Mode to not use UTF-8 | ||||
|              "-X", "coerce_c_locale=0", | ||||
|              "-X", "utf8=0", | ||||
|              "-c", code], | ||||
|             stdout=subprocess.PIPE, stderr=subprocess.STDOUT, | ||||
|             env=env) | ||||
|         stdout, stderr = p.communicate() | ||||
|  |  | |||
|  | @ -656,9 +656,8 @@ def test_getfilesystemencoding(self): | |||
| 
 | ||||
|     def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): | ||||
|         # Force the POSIX locale | ||||
|         env = os.environ.copy() | ||||
|         env = dict(os.environ) | ||||
|         env["LC_ALL"] = locale | ||||
|         env["PYTHONCOERCECLOCALE"] = "0" | ||||
|         code = '\n'.join(( | ||||
|             'import sys', | ||||
|             'def dump(name):', | ||||
|  | @ -668,7 +667,10 @@ def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): | |||
|             'dump("stdout")', | ||||
|             'dump("stderr")', | ||||
|         )) | ||||
|         args = [sys.executable, "-X", "utf8=0", "-c", code] | ||||
|         args = [sys.executable, | ||||
|                 "-X", "utf8=0", | ||||
|                 "-X", "coerce_c_locale=0", | ||||
|                 "-c", code] | ||||
|         if isolated: | ||||
|             args.append("-I") | ||||
|         if encoding is not None: | ||||
|  |  | |||
|  | @ -27,6 +27,8 @@ def posix_locale(self): | |||
|         return (loc in POSIX_LOCALES) | ||||
| 
 | ||||
|     def get_output(self, *args, failure=False, **kw): | ||||
|         # Always disable the C locale coercion (PEP 538) | ||||
|         args = ('-X', 'coerce_c_locale=0', *args) | ||||
|         kw = dict(self.DEFAULT_ENV, **kw) | ||||
|         if failure: | ||||
|             out = assert_python_failure(*args, **kw) | ||||
|  | @ -116,7 +118,6 @@ def test_filesystemencoding(self): | |||
|             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode | ||||
|             # and has the priority over -X utf8 and PYTHONUTF8 | ||||
|             out = self.get_output('-X', 'utf8', '-c', code, | ||||
|                                   PYTHONUTF8='strict', | ||||
|                                   PYTHONLEGACYWINDOWSFSENCODING='1') | ||||
|             self.assertEqual(out, 'mbcs/replace') | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| Add a new :option:`-X` ``coerce_c_locale`` command line option to control C | ||||
| locale coercion (:pep:`538`). | ||||
|  | @ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config) | |||
|         return _Py_INIT_OK(); | ||||
|     } | ||||
| 
 | ||||
| #ifndef MS_WINDOWS | ||||
|     /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ | ||||
|     const char *ctype_loc = setlocale(LC_CTYPE, NULL); | ||||
|     if (ctype_loc != NULL | ||||
|         && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) | ||||
|     { | ||||
|         config->utf8_mode = 1; | ||||
|         return _Py_INIT_OK(); | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     return _Py_INIT_OK(); | ||||
| } | ||||
| 
 | ||||
|  | @ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config) | |||
|         config->malloc_stats = 1; | ||||
|     } | ||||
| 
 | ||||
|     const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); | ||||
|     if (env) { | ||||
|         if (strcmp(env, "0") == 0) { | ||||
|             if (config->_coerce_c_locale < 0) { | ||||
|                 config->_coerce_c_locale = 0; | ||||
|             } | ||||
|         } | ||||
|         else if (strcmp(env, "warn") == 0) { | ||||
|             if (config->_coerce_c_locale_warn < 0) { | ||||
|                 config->_coerce_c_locale_warn = 1; | ||||
|             } | ||||
|         } | ||||
|         else { | ||||
|             if (config->_coerce_c_locale < 0) { | ||||
|                 config->_coerce_c_locale = 1; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     wchar_t *path; | ||||
|     int res = _PyCoreConfig_GetEnvDup(config, &path, | ||||
|                                       L"PYTHONPATH", "PYTHONPATH"); | ||||
|  | @ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| static void | ||||
| config_init_locale(_PyCoreConfig *config) | ||||
| static _PyInitError | ||||
| config_init_coerce_c_locale(_PyCoreConfig *config) | ||||
| { | ||||
|     const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale"); | ||||
|     if (xopt) { | ||||
|         wchar_t *sep = wcschr(xopt, L'='); | ||||
|         if (sep) { | ||||
|             xopt = sep + 1; | ||||
|             if (wcscmp(xopt, L"1") == 0) { | ||||
|                 if (config->_coerce_c_locale < 0) { | ||||
|                     config->_coerce_c_locale = 1; | ||||
|                 } | ||||
|             } | ||||
|             else if (wcscmp(xopt, L"0") == 0) { | ||||
|                 if (config->_coerce_c_locale < 0) { | ||||
|                     config->_coerce_c_locale = 0; | ||||
|                 } | ||||
|             } | ||||
|             else if (wcscmp(xopt, L"warn") == 0) { | ||||
|                 if (config->_coerce_c_locale_warn < 0) { | ||||
|                     config->_coerce_c_locale_warn = 1; | ||||
|                 } | ||||
|             } | ||||
|             else { | ||||
|                 return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value"); | ||||
|             } | ||||
|         } | ||||
|         else { | ||||
|             if (config->_coerce_c_locale < 0) { | ||||
|         /* The C locale enables the C locale coercion (PEP 538) */ | ||||
|         if (_Py_LegacyLocaleDetected()) { | ||||
|                 config->_coerce_c_locale = 1; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
| #ifndef MS_WINDOWS | ||||
|     if (config->utf8_mode < 0) { | ||||
|         /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */ | ||||
|         const char *ctype_loc = setlocale(LC_CTYPE, NULL); | ||||
|         if (ctype_loc != NULL | ||||
|            && (strcmp(ctype_loc, "C") == 0 | ||||
|                || strcmp(ctype_loc, "POSIX") == 0)) | ||||
|         { | ||||
|             config->utf8_mode = 1; | ||||
|         if (config->_coerce_c_locale_warn < 0) { | ||||
|             config->_coerce_c_locale_warn = 0; | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE"); | ||||
|     if (env) { | ||||
|         if (strcmp(env, "0") == 0) { | ||||
|             if (config->_coerce_c_locale < 0) { | ||||
|                 config->_coerce_c_locale = 0; | ||||
|             } | ||||
|         } | ||||
|         else if (strcmp(env, "warn") == 0) { | ||||
|             if (config->_coerce_c_locale_warn < 0) { | ||||
|                 config->_coerce_c_locale_warn = 1; | ||||
|             } | ||||
|         } | ||||
|         else { | ||||
|             if (config->_coerce_c_locale < 0) { | ||||
|                 config->_coerce_c_locale = 1; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (config->_coerce_c_locale_warn < 0) { | ||||
|             config->_coerce_c_locale_warn = 0; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (config->_coerce_c_locale < 0) { | ||||
|         /* The C locale enables the C locale coercion (PEP 538) */ | ||||
|         if (_Py_LegacyLocaleDetected()) { | ||||
|             config->_coerce_c_locale = 1; | ||||
|             return _Py_INIT_OK(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return _Py_INIT_OK(); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config) | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) { | ||||
|         config_init_locale(config); | ||||
|     if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) { | ||||
|         err = config_init_coerce_c_locale(config); | ||||
|         if (_Py_INIT_FAILED(err)) { | ||||
|             return err; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (config->_install_importlib) { | ||||
|  | @ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config) | |||
|     } | ||||
| 
 | ||||
|     assert(config->_coerce_c_locale >= 0); | ||||
|     assert(config->_coerce_c_locale_warn >= 0); | ||||
|     assert(config->use_environment >= 0); | ||||
|     assert(config->filesystem_encoding != NULL); | ||||
|     assert(config->filesystem_errors != NULL); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner