mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)
- removes PY_WARN_ON_C_LOCALE build time flag - locale coercion and compatibility warnings are now always compiled in, but are off by default - adds PYTHONCOERCECLOCALE=warn runtime option to aid in debugging potentially locale related compatibility problems Due to not-yet-resolved test failures on *BSD systems (including Mac OS X), this also temporarily disables UTF-8 as a locale coercion target, and skips testing the interpreter's behavior in the POSIX locale.
This commit is contained in:
		
							parent
							
								
									6a98a04e21
								
							
						
					
					
						commit
						eb81795d7d
					
				
					 5 changed files with 184 additions and 122 deletions
				
			
		|  | @ -744,6 +744,11 @@ conflict. | |||
|    :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This | ||||
|    behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. | ||||
| 
 | ||||
|    For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause | ||||
|    Python to emit warning messages on ``stderr`` if either the locale coercion | ||||
|    activates, or else if a locale that *would* have triggered coercion is | ||||
|    still active when the Python runtime is initialized. | ||||
| 
 | ||||
|    Availability: \*nix | ||||
| 
 | ||||
|    .. versionadded:: 3.7 | ||||
|  |  | |||
|  | @ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and | |||
| ``UTF-8``). The default error handler for ``stderr`` continues to be | ||||
| ``backslashreplace``, regardless of locale. | ||||
| 
 | ||||
| .. note:: | ||||
| 
 | ||||
|    In the current implementation, a warning message is printed directly to | ||||
|    ``stderr`` even for successful implicit locale coercion. This gives | ||||
|    redistributors and system integrators the opportunity to determine if they | ||||
|    should be making an environmental change to avoid the need for implicit | ||||
|    coercion at the Python interpreter level. | ||||
| 
 | ||||
|    However, it's not clear that this is going to be the best approach for | ||||
|    the final 3.7.0 release, and we may end up deciding to disable the warning | ||||
|    by default and provide some way of opting into it at runtime or build time. | ||||
| 
 | ||||
|    Concrete examples of use cases where it would be preferrable to disable the | ||||
|    warning by default can be noted on :issue:`30565`. | ||||
| Locale coercion is silent by default, but to assist in debugging potentially | ||||
| locale related integration problems, explicit warnings (emitted directly on | ||||
| ``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This | ||||
| setting will also cause the Python runtime to emit a warning if the legacy C | ||||
| locale remains active when the core interpreter is initialized. | ||||
| 
 | ||||
| .. seealso:: | ||||
| 
 | ||||
|  |  | |||
|  | @ -22,13 +22,23 @@ | |||
| else: | ||||
|     C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING | ||||
| 
 | ||||
| # XXX (ncoghlan): The above is probably still wrong for: | ||||
| # Note that the above is probably still wrong in some cases, such as: | ||||
| # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set | ||||
| # * AIX and any other platforms that use latin-1 in the C locale | ||||
| # | ||||
| # Options for dealing with this: | ||||
| # * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't) | ||||
| # * Fix the test expectations to match the actual platform behaviour | ||||
| 
 | ||||
| # In order to get the warning messages to match up as expected, the candidate | ||||
| # order here must much the target locale order in Python/pylifecycle.c | ||||
| _C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") | ||||
| _C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") | ||||
| 
 | ||||
| # XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to | ||||
| #                 problems encountered on *BSD systems with those test cases | ||||
| # For additional details see: | ||||
| #     nl_langinfo CODESET error: https://bugs.python.org/issue30647 | ||||
| #     locale handling differences: https://bugs.python.org/issue30672 | ||||
| 
 | ||||
| # There's no reliable cross-platform way of checking locale alias | ||||
| # lists, so the only way of knowing which of these locales will work | ||||
|  | @ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name): | |||
|     result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) | ||||
|     return result.rc == 0 | ||||
| 
 | ||||
| _EncodingDetails = namedtuple("EncodingDetails", | ||||
|                               "fsencoding stdin_info stdout_info stderr_info") | ||||
| _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" | ||||
| _EncodingDetails = namedtuple("EncodingDetails", _fields) | ||||
| 
 | ||||
| class EncodingDetails(_EncodingDetails): | ||||
|     # XXX (ncoghlan): Using JSON for child state reporting may be less fragile | ||||
|     CHILD_PROCESS_SCRIPT = ";".join([ | ||||
|         "import sys", | ||||
|         "import sys, os", | ||||
|         "print(sys.getfilesystemencoding())", | ||||
|         "print(sys.stdin.encoding + ':' + sys.stdin.errors)", | ||||
|         "print(sys.stdout.encoding + ':' + sys.stdout.errors)", | ||||
|         "print(sys.stderr.encoding + ':' + sys.stderr.errors)", | ||||
|         "print(os.environ.get('LANG', 'not set'))", | ||||
|         "print(os.environ.get('LC_CTYPE', 'not set'))", | ||||
|         "print(os.environ.get('LC_ALL', 'not set'))", | ||||
|     ]) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get_expected_details(cls, fs_encoding, stream_encoding): | ||||
|     def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): | ||||
|         """Returns expected child process details for a given encoding""" | ||||
|         _stream = stream_encoding + ":{}" | ||||
|         # stdin and stdout should use surrogateescape either because the | ||||
|  | @ -61,7 +75,14 @@ def get_expected_details(cls, fs_encoding, stream_encoding): | |||
|         stream_info = 2*[_stream.format("surrogateescape")] | ||||
|         # stderr should always use backslashreplace | ||||
|         stream_info.append(_stream.format("backslashreplace")) | ||||
|         return dict(cls(fs_encoding, *stream_info)._asdict()) | ||||
|         expected_lang = env_vars.get("LANG", "not set").lower() | ||||
|         if coercion_expected: | ||||
|             expected_lc_ctype = CLI_COERCION_TARGET.lower() | ||||
|         else: | ||||
|             expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() | ||||
|         expected_lc_all = env_vars.get("LC_ALL", "not set").lower() | ||||
|         env_info = expected_lang, expected_lc_ctype, expected_lc_all | ||||
|         return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _handle_output_variations(data): | ||||
|  | @ -97,64 +118,20 @@ def get_child_details(cls, env_vars): | |||
|             result.fail(py_cmd) | ||||
|         # All subprocess outputs in this test case should be pure ASCII | ||||
|         adjusted_output = cls._handle_output_variations(result.out) | ||||
|         stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() | ||||
|         stdout_lines = adjusted_output.decode("ascii").splitlines() | ||||
|         child_encoding_details = dict(cls(*stdout_lines)._asdict()) | ||||
|         stderr_lines = result.err.decode("ascii").rstrip().splitlines() | ||||
|         return child_encoding_details, stderr_lines | ||||
| 
 | ||||
| 
 | ||||
| class _ChildProcessEncodingTestCase(unittest.TestCase): | ||||
|     # Base class to check for expected encoding details in a child process | ||||
| 
 | ||||
|     def _check_child_encoding_details(self, | ||||
|                                       env_vars, | ||||
|                                       expected_fs_encoding, | ||||
|                                       expected_stream_encoding, | ||||
|                                       expected_warning): | ||||
|         """Check the C locale handling for the given process environment | ||||
| 
 | ||||
|         Parameters: | ||||
|             expected_fs_encoding: expected sys.getfilesystemencoding() result | ||||
|             expected_stream_encoding: expected encoding for standard streams | ||||
|             expected_warning: stderr output to expect (if any) | ||||
|         """ | ||||
|         result = EncodingDetails.get_child_details(env_vars) | ||||
|         encoding_details, stderr_lines = result | ||||
|         self.assertEqual(encoding_details, | ||||
|                          EncodingDetails.get_expected_details( | ||||
|                              expected_fs_encoding, | ||||
|                              expected_stream_encoding)) | ||||
|         self.assertEqual(stderr_lines, expected_warning) | ||||
| 
 | ||||
| # Details of the shared library warning emitted at runtime | ||||
| LIBRARY_C_LOCALE_WARNING = ( | ||||
| LEGACY_LOCALE_WARNING = ( | ||||
|     "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " | ||||
|     "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " | ||||
|     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " | ||||
|     "locales is recommended." | ||||
| ) | ||||
| 
 | ||||
| @unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"), | ||||
|                      "C locale runtime warning disabled at build time") | ||||
| class LocaleWarningTests(_ChildProcessEncodingTestCase): | ||||
|     # Test warning emitted when running in the C locale | ||||
| 
 | ||||
|     def test_library_c_locale_warning(self): | ||||
|         self.maxDiff = None | ||||
|         for locale_to_set in ("C", "POSIX", "invalid.ascii"): | ||||
|             # XXX (ncoghlan): Mac OS X doesn't behave as expected in the | ||||
|             #                 POSIX locale, so we skip that for now | ||||
|             if sys.platform == "darwin" and locale_to_set == "POSIX": | ||||
|                 continue | ||||
|             var_dict = { | ||||
|                 "LC_ALL": locale_to_set | ||||
|             } | ||||
|             with self.subTest(forced_locale=locale_to_set): | ||||
|                 self._check_child_encoding_details(var_dict, | ||||
|                                                    C_LOCALE_FS_ENCODING, | ||||
|                                                    C_LOCALE_STREAM_ENCODING, | ||||
|                                                    [LIBRARY_C_LOCALE_WARNING]) | ||||
| 
 | ||||
| # Details of the CLI locale coercion warning emitted at runtime | ||||
| CLI_COERCION_WARNING_FMT = ( | ||||
|     "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " | ||||
|  | @ -163,9 +140,13 @@ def test_library_c_locale_warning(self): | |||
| 
 | ||||
| 
 | ||||
| AVAILABLE_TARGETS = None | ||||
| CLI_COERCION_TARGET = None | ||||
| CLI_COERCION_WARNING = None | ||||
| 
 | ||||
| def setUpModule(): | ||||
|     global AVAILABLE_TARGETS | ||||
|     global CLI_COERCION_TARGET | ||||
|     global CLI_COERCION_WARNING | ||||
| 
 | ||||
|     if AVAILABLE_TARGETS is not None: | ||||
|         # initialization already done | ||||
|  | @ -177,26 +158,57 @@ def setUpModule(): | |||
|         if _set_locale_in_subprocess(target_locale): | ||||
|             AVAILABLE_TARGETS.append(target_locale) | ||||
| 
 | ||||
|     if AVAILABLE_TARGETS: | ||||
|         # Coercion is expected to use the first available target locale | ||||
|         CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] | ||||
|         CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) | ||||
| 
 | ||||
| 
 | ||||
| class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): | ||||
|     # Base class for test cases that rely on coercion targets being defined | ||||
| class _LocaleHandlingTestCase(unittest.TestCase): | ||||
|     # Base class to check expected locale handling behaviour | ||||
| 
 | ||||
|     @classmethod | ||||
|     def setUpClass(cls): | ||||
|     def _check_child_encoding_details(self, | ||||
|                                       env_vars, | ||||
|                                       expected_fs_encoding, | ||||
|                                       expected_stream_encoding, | ||||
|                                       expected_warnings, | ||||
|                                       coercion_expected): | ||||
|         """Check the C locale handling for the given process environment | ||||
| 
 | ||||
|         Parameters: | ||||
|             expected_fs_encoding: expected sys.getfilesystemencoding() result | ||||
|             expected_stream_encoding: expected encoding for standard streams | ||||
|             expected_warning: stderr output to expect (if any) | ||||
|         """ | ||||
|         result = EncodingDetails.get_child_details(env_vars) | ||||
|         encoding_details, stderr_lines = result | ||||
|         expected_details = EncodingDetails.get_expected_details( | ||||
|             coercion_expected, | ||||
|             expected_fs_encoding, | ||||
|             expected_stream_encoding, | ||||
|             env_vars | ||||
|         ) | ||||
|         self.assertEqual(encoding_details, expected_details) | ||||
|         if expected_warnings is None: | ||||
|             expected_warnings = [] | ||||
|         self.assertEqual(stderr_lines, expected_warnings) | ||||
| 
 | ||||
| 
 | ||||
| class LocaleConfigurationTests(_LocaleHandlingTestCase): | ||||
|     # Test explicit external configuration via the process environment | ||||
| 
 | ||||
|     def setUpClass(): | ||||
|         # This relies on setupModule() having been run, so it can't be | ||||
|         # handled via the @unittest.skipUnless decorator | ||||
|         if not AVAILABLE_TARGETS: | ||||
|             raise unittest.SkipTest("No C-with-UTF-8 locale available") | ||||
| 
 | ||||
| 
 | ||||
| class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase): | ||||
|     # Test explicit external configuration via the process environment | ||||
| 
 | ||||
|     def test_external_target_locale_configuration(self): | ||||
| 
 | ||||
|         # Explicitly setting a target locale should give the same behaviour as | ||||
|         # is seen when implicitly coercing to that target locale | ||||
|         self.maxDiff = None | ||||
| 
 | ||||
|         expected_warning = [] | ||||
|         expected_fs_encoding = "utf-8" | ||||
|         expected_stream_encoding = "utf-8" | ||||
| 
 | ||||
|  | @ -209,6 +221,7 @@ def test_external_target_locale_configuration(self): | |||
|             for locale_to_set in AVAILABLE_TARGETS: | ||||
|                 # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as | ||||
|                 #                 expected, so skip that combination for now | ||||
|                 # See https://bugs.python.org/issue30672 for discussion | ||||
|                 if env_var == "LANG" and locale_to_set == "UTF-8": | ||||
|                     continue | ||||
| 
 | ||||
|  | @ -219,17 +232,23 @@ def test_external_target_locale_configuration(self): | |||
|                     self._check_child_encoding_details(var_dict, | ||||
|                                                        expected_fs_encoding, | ||||
|                                                        expected_stream_encoding, | ||||
|                                                        expected_warning) | ||||
|                                                        expected_warnings=None, | ||||
|                                                        coercion_expected=False) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @test.support.cpython_only | ||||
| @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), | ||||
|                      "C locale coercion disabled at build time") | ||||
| class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): | ||||
| class LocaleCoercionTests(_LocaleHandlingTestCase): | ||||
|     # Test implicit reconfiguration of the environment during CLI startup | ||||
| 
 | ||||
|     def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale): | ||||
|     def _check_c_locale_coercion(self, | ||||
|                                  fs_encoding, stream_encoding, | ||||
|                                  coerce_c_locale, | ||||
|                                  expected_warnings=None, | ||||
|                                  coercion_expected=True, | ||||
|                                  **extra_vars): | ||||
|         """Check the C locale handling for various configurations | ||||
| 
 | ||||
|         Parameters: | ||||
|  | @ -238,27 +257,31 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale | |||
|             coerce_c_locale: setting to use for PYTHONCOERCECLOCALE | ||||
|               None: don't set the variable at all | ||||
|               str: the value set in the child's environment | ||||
|             expected_warnings: expected warning lines on stderr | ||||
|             extra_vars: additional environment variables to set in subprocess | ||||
|         """ | ||||
| 
 | ||||
|         # Check for expected warning on stderr if C locale is coerced | ||||
|         self.maxDiff = None | ||||
| 
 | ||||
|         expected_warning = [] | ||||
|         if coerce_c_locale != "0": | ||||
|             # Expect coercion to use the first available locale | ||||
|             warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0]) | ||||
|             expected_warning.append(warning_msg) | ||||
|         if not AVAILABLE_TARGETS: | ||||
|             # Locale coercion is disabled when there aren't any target locales | ||||
|             fs_encoding = C_LOCALE_FS_ENCODING | ||||
|             stream_encoding = C_LOCALE_STREAM_ENCODING | ||||
|             coercion_expected = False | ||||
|             if expected_warnings: | ||||
|                 expected_warnings = [LEGACY_LOCALE_WARNING] | ||||
| 
 | ||||
|         base_var_dict = { | ||||
|             "LANG": "", | ||||
|             "LC_CTYPE": "", | ||||
|             "LC_ALL": "", | ||||
|         } | ||||
|         base_var_dict.update(extra_vars) | ||||
|         for env_var in ("LANG", "LC_CTYPE"): | ||||
|             for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): | ||||
|                 # XXX (ncoghlan): Mac OS X doesn't behave as expected in the | ||||
|                 # XXX (ncoghlan): *BSD platforms don't behave as expected in the | ||||
|                 #                 POSIX locale, so we skip that for now | ||||
|                 if sys.platform == "darwin" and locale_to_set == "POSIX": | ||||
|                 # See https://bugs.python.org/issue30672 for discussion | ||||
|                 if locale_to_set == "POSIX": | ||||
|                     continue | ||||
|                 with self.subTest(env_var=env_var, | ||||
|                                   nominal_locale=locale_to_set, | ||||
|  | @ -267,33 +290,62 @@ def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale | |||
|                     var_dict[env_var] = locale_to_set | ||||
|                     if coerce_c_locale is not None: | ||||
|                         var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale | ||||
|                     # Check behaviour on successful coercion | ||||
|                     self._check_child_encoding_details(var_dict, | ||||
|                                                        fs_encoding, | ||||
|                                                        stream_encoding, | ||||
|                                                        expected_warning) | ||||
|                                                        expected_warnings, | ||||
|                                                        coercion_expected) | ||||
| 
 | ||||
|     def test_test_PYTHONCOERCECLOCALE_not_set(self): | ||||
|         # This should coerce to the first available target locale by default | ||||
|         self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) | ||||
| 
 | ||||
|     def test_PYTHONCOERCECLOCALE_not_zero(self): | ||||
|         # *Any* string other that "0" is considered "set" for our purposes | ||||
|         # *Any* string other than "0" is considered "set" for our purposes | ||||
|         # and hence should result in the locale coercion being enabled | ||||
|         for setting in ("", "1", "true", "false"): | ||||
|             self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) | ||||
| 
 | ||||
|     def test_PYTHONCOERCECLOCALE_set_to_warn(self): | ||||
|         # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales | ||||
|         self._check_c_locale_coercion("utf-8", "utf-8", | ||||
|                                       coerce_c_locale="warn", | ||||
|                                       expected_warnings=[CLI_COERCION_WARNING]) | ||||
| 
 | ||||
| 
 | ||||
|     def test_PYTHONCOERCECLOCALE_set_to_zero(self): | ||||
|         # The setting "0" should result in the locale coercion being disabled | ||||
|         self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, | ||||
|                                       C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale="0") | ||||
|                                       coerce_c_locale="0", | ||||
|                                       coercion_expected=False) | ||||
|         # Setting LC_ALL=C shouldn't make any difference to the behaviour | ||||
|         self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, | ||||
|                                       C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale="0", | ||||
|                                       LC_ALL="C", | ||||
|                                       coercion_expected=False) | ||||
| 
 | ||||
|     def test_LC_ALL_set_to_C(self): | ||||
|         # Setting LC_ALL should render the locale coercion ineffective | ||||
|         self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, | ||||
|                                       C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale=None, | ||||
|                                       LC_ALL="C", | ||||
|                                       coercion_expected=False) | ||||
|         # And result in a warning about a lack of locale compatibility | ||||
|         self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, | ||||
|                                       C_LOCALE_STREAM_ENCODING, | ||||
|                                       coerce_c_locale="warn", | ||||
|                                       LC_ALL="C", | ||||
|                                       expected_warnings=[LEGACY_LOCALE_WARNING], | ||||
|                                       coercion_expected=False) | ||||
| 
 | ||||
| def test_main(): | ||||
|     test.support.run_unittest( | ||||
|         LocaleConfigurationTests, | ||||
|         LocaleCoercionTests, | ||||
|         LocaleWarningTests | ||||
|         LocaleCoercionTests | ||||
|     ) | ||||
|     test.support.reap_children() | ||||
| 
 | ||||
|  |  | |||
|  | @ -105,10 +105,10 @@ static const char usage_6[] = | |||
| "   predictable seed.\n" | ||||
| "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" | ||||
| "   on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" | ||||
| "   hooks.\n"; | ||||
| static const char usage_7[] = | ||||
| "   hooks.\n" | ||||
| "PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n" | ||||
| "   coercion behavior\n"; | ||||
| "   coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n" | ||||
| "   locale coercion and locale compatibility warnings on stderr.\n"; | ||||
| 
 | ||||
| static int | ||||
| usage(int exitcode, const wchar_t* program) | ||||
|  | @ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program) | |||
|         fprintf(f, usage_4, (wint_t)DELIM); | ||||
|         fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP); | ||||
|         fputs(usage_6, f); | ||||
|         fputs(usage_7, f); | ||||
|     } | ||||
|     return exitcode; | ||||
| } | ||||
|  |  | |||
|  | @ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void) | |||
| { | ||||
| #ifndef MS_WINDOWS | ||||
|     /* On non-Windows systems, the C locale is considered a legacy locale */ | ||||
|     /* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
 | ||||
|      *                 the POSIX locale as a simple alias for the C locale, so | ||||
|      *                 we may also want to check for that explicitly. | ||||
|      */ | ||||
|     const char *ctype_loc = setlocale(LC_CTYPE, NULL); | ||||
|     return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; | ||||
| #else | ||||
|  | @ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void) | |||
| #endif | ||||
| } | ||||
| 
 | ||||
| static const char *_C_LOCALE_WARNING = | ||||
|     "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " | ||||
|     "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " | ||||
|     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " | ||||
|     "locales is recommended.\n"; | ||||
| 
 | ||||
| static int | ||||
| _legacy_locale_warnings_enabled(void) | ||||
| { | ||||
|     const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); | ||||
|     return (coerce_c_locale != NULL && | ||||
|             strncmp(coerce_c_locale, "warn", 5) == 0); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| _emit_stderr_warning_for_legacy_locale(void) | ||||
| { | ||||
|     if (_legacy_locale_warnings_enabled()) { | ||||
|         if (_Py_LegacyLocaleDetected()) { | ||||
|             fprintf(stderr, "%s", _C_LOCALE_WARNING); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| typedef struct _CandidateLocale { | ||||
|     const char *locale_name; /* The locale to try as a coercion target */ | ||||
| } _LocaleCoercionTarget; | ||||
|  | @ -371,10 +399,17 @@ typedef struct _CandidateLocale { | |||
| static _LocaleCoercionTarget _TARGET_LOCALES[] = { | ||||
|     {"C.UTF-8"}, | ||||
|     {"C.utf8"}, | ||||
|     {"UTF-8"}, | ||||
|     /* {"UTF-8"}, */ | ||||
|     {NULL} | ||||
| }; | ||||
| 
 | ||||
| /* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
 | ||||
|  *                 problems encountered on *BSD systems with those test cases | ||||
|  * For additional details see: | ||||
|  *     nl_langinfo CODESET error: https://bugs.python.org/issue30647
 | ||||
|  *     locale handling differences: https://bugs.python.org/issue30672
 | ||||
|  */ | ||||
| 
 | ||||
| static char * | ||||
| get_default_standard_stream_error_handler(void) | ||||
| { | ||||
|  | @ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target) | |||
|                 "Error setting LC_CTYPE, skipping C locale coercion\n"); | ||||
|         return; | ||||
|     } | ||||
|     if (_legacy_locale_warnings_enabled()) { | ||||
|         fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); | ||||
|     } | ||||
| 
 | ||||
|     /* Reconfigure with the overridden environment variables */ | ||||
|     setlocale(LC_ALL, ""); | ||||
|  | @ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| #ifdef PY_WARN_ON_C_LOCALE | ||||
| static const char *_C_LOCALE_WARNING = | ||||
|     "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " | ||||
|     "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " | ||||
|     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " | ||||
|     "locales is recommended.\n"; | ||||
| 
 | ||||
| static void | ||||
| _emit_stderr_warning_for_c_locale(void) | ||||
| { | ||||
|     const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE"); | ||||
|     if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) { | ||||
|         if (_Py_LegacyLocaleDetected()) { | ||||
|             fprintf(stderr, "%s", _C_LOCALE_WARNING); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| /* Global initializations.  Can be undone by Py_Finalize().  Don't
 | ||||
|    call this twice without an intervening Py_Finalize() call. | ||||
| 
 | ||||
|  | @ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config) | |||
|        the locale's charset without having to switch | ||||
|        locales. */ | ||||
|     setlocale(LC_CTYPE, ""); | ||||
| #ifdef PY_WARN_ON_C_LOCALE | ||||
|     _emit_stderr_warning_for_c_locale(); | ||||
| #endif | ||||
|     _emit_stderr_warning_for_legacy_locale(); | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Nick Coghlan
						Nick Coghlan