| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | # Tests the attempted automatic coercion of the C locale to a UTF-8 locale | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import unittest | 
					
						
							| 
									
										
										
										
											2017-06-30 00:48:14 +10:00
										 |  |  | import locale | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | import os | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import sysconfig | 
					
						
							|  |  |  | import shutil | 
					
						
							|  |  |  | from collections import namedtuple | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import test.support | 
					
						
							|  |  |  | from test.support.script_helper import ( | 
					
						
							|  |  |  |     run_python_until_end, | 
					
						
							|  |  |  |     interpreter_requires_environment, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  | # Set the list of ways we expect to be able to ask for the "C" locale | 
					
						
							|  |  |  | EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  | # Set our expectation for the default encoding used in the C locale | 
					
						
							|  |  |  | # for the filesystem encoding and the standard streams | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  | EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii" | 
					
						
							|  |  |  | EXPECTED_C_LOCALE_FS_ENCODING = "ascii" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Set our expectation for the default locale used when none is specified | 
					
						
							|  |  |  | EXPECT_COERCION_IN_DEFAULT_LOCALE = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Apply some platform dependent overrides | 
					
						
							|  |  |  | if sys.platform.startswith("linux"): | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |     if test.support.is_android: | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         # Android defaults to using UTF-8 for all system interfaces | 
					
						
							|  |  |  |         EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8" | 
					
						
							|  |  |  |         EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # Linux distros typically alias the POSIX locale directly to the C | 
					
						
							|  |  |  |         # locale. | 
					
						
							|  |  |  |         # TODO: Once https://bugs.python.org/issue30672 is addressed, we'll be | 
					
						
							|  |  |  |         #       able to check this case unconditionally | 
					
						
							|  |  |  |         EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX") | 
					
						
							|  |  |  | elif sys.platform.startswith("aix"): | 
					
						
							|  |  |  |     # AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII | 
					
						
							|  |  |  |     EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1" | 
					
						
							|  |  |  |     EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1" | 
					
						
							|  |  |  | elif sys.platform == "darwin": | 
					
						
							|  |  |  |     # FS encoding is UTF-8 on macOS | 
					
						
							|  |  |  |     EXPECTED_C_LOCALE_FS_ENCODING = "utf-8" | 
					
						
							|  |  |  | elif sys.platform == "cygwin": | 
					
						
							|  |  |  |     # Cygwin defaults to using C.UTF-8 | 
					
						
							|  |  |  |     # TODO: Work out a robust dynamic test for this that doesn't rely on | 
					
						
							|  |  |  |     #       CPython's own locale handling machinery | 
					
						
							|  |  |  |     EXPECT_COERCION_IN_DEFAULT_LOCALE = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Note that the above expectations are still wrong in some cases, such as: | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  | # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  | # * Any platform other than AIX that uses latin-1 in the C locale | 
					
						
							|  |  |  | # * Any Linux distro where POSIX isn't a simple alias for the C locale | 
					
						
							|  |  |  | # * Any Linux distro where the default locale is something other than "C" | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | # | 
					
						
							|  |  |  | # Options for dealing with this: | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  | # * Don't set the PY_COERCE_C_LOCALE preprocessor definition on | 
					
						
							|  |  |  | #   such platforms (e.g. it isn't set on Windows) | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | # * Fix the test expectations to match the actual platform behaviour | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | # In order to get the warning messages to match up as expected, the candidate | 
					
						
							|  |  |  | # order here must much the target locale order in Python/pylifecycle.c | 
					
						
							| 
									
										
										
										
											2017-06-30 00:48:14 +10:00
										 |  |  | _C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | # There's no reliable cross-platform way of checking locale alias | 
					
						
							|  |  |  | # lists, so the only way of knowing which of these locales will work | 
					
						
							|  |  |  | # is to try them with locale.setlocale(). We do that in a subprocess | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  | # in setUpModule() below to avoid altering the locale of the test runner. | 
					
						
							| 
									
										
										
										
											2017-06-30 00:48:14 +10:00
										 |  |  | # | 
					
						
							|  |  |  | # If the relevant locale module attributes exist, and we're not on a platform | 
					
						
							|  |  |  | # where we expect it to always succeed, we also check that | 
					
						
							|  |  |  | # `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter | 
					
						
							|  |  |  | # will skip locale coercion for that particular target locale | 
					
						
							|  |  |  | _check_nl_langinfo_CODESET = bool( | 
					
						
							|  |  |  |     sys.platform not in ("darwin", "linux") and | 
					
						
							|  |  |  |     hasattr(locale, "nl_langinfo") and | 
					
						
							|  |  |  |     hasattr(locale, "CODESET") | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | def _set_locale_in_subprocess(locale_name): | 
					
						
							|  |  |  |     cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" | 
					
						
							| 
									
										
										
										
											2017-06-30 00:48:14 +10:00
										 |  |  |     if _check_nl_langinfo_CODESET: | 
					
						
							|  |  |  |         # If there's no valid CODESET, we expect coercion to be skipped | 
					
						
							|  |  |  |         cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     cmd = cmd_fmt.format(locale_name) | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |     result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='') | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     return result.rc == 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-30 00:48:14 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" | 
					
						
							|  |  |  | _EncodingDetails = namedtuple("EncodingDetails", _fields) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | class EncodingDetails(_EncodingDetails): | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     # XXX (ncoghlan): Using JSON for child state reporting may be less fragile | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     CHILD_PROCESS_SCRIPT = ";".join([ | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         "import sys, os", | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         "print(sys.getfilesystemencoding())", | 
					
						
							|  |  |  |         "print(sys.stdin.encoding + ':' + sys.stdin.errors)", | 
					
						
							|  |  |  |         "print(sys.stdout.encoding + ':' + sys.stdout.errors)", | 
					
						
							|  |  |  |         "print(sys.stderr.encoding + ':' + sys.stderr.errors)", | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         "print(os.environ.get('LANG', 'not set'))", | 
					
						
							|  |  |  |         "print(os.environ.get('LC_CTYPE', 'not set'))", | 
					
						
							|  |  |  |         "print(os.environ.get('LC_ALL', 'not set'))", | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     ]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @classmethod | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         """Returns expected child process details for a given encoding""" | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |         _stream = stream_encoding + ":{}" | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         # stdin and stdout should use surrogateescape either because the | 
					
						
							|  |  |  |         # coercion triggered, or because the C locale was detected | 
					
						
							|  |  |  |         stream_info = 2*[_stream.format("surrogateescape")] | 
					
						
							|  |  |  |         # stderr should always use backslashreplace | 
					
						
							|  |  |  |         stream_info.append(_stream.format("backslashreplace")) | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         expected_lang = env_vars.get("LANG", "not set").lower() | 
					
						
							|  |  |  |         if coercion_expected: | 
					
						
							|  |  |  |             expected_lc_ctype = CLI_COERCION_TARGET.lower() | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower() | 
					
						
							|  |  |  |         expected_lc_all = env_vars.get("LC_ALL", "not set").lower() | 
					
						
							|  |  |  |         env_info = expected_lang, expected_lc_ctype, expected_lc_all | 
					
						
							|  |  |  |         return dict(cls(fs_encoding, *stream_info, *env_info)._asdict()) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @staticmethod | 
					
						
							|  |  |  |     def _handle_output_variations(data): | 
					
						
							|  |  |  |         """Adjust the output to handle platform specific idiosyncrasies
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         * Some platforms report ASCII as ANSI_X3.4-1968 | 
					
						
							|  |  |  |         * Some platforms report ASCII as US-ASCII | 
					
						
							|  |  |  |         * Some platforms report UTF-8 instead of utf-8 | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         data = data.replace(b"ANSI_X3.4-1968", b"ascii") | 
					
						
							|  |  |  |         data = data.replace(b"US-ASCII", b"ascii") | 
					
						
							|  |  |  |         data = data.lower() | 
					
						
							|  |  |  |         return data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @classmethod | 
					
						
							|  |  |  |     def get_child_details(cls, env_vars): | 
					
						
							|  |  |  |         """Retrieves fsencoding and standard stream details from a child process
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Returns (encoding_details, stderr_lines): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         - encoding_details: EncodingDetails for eager decoding | 
					
						
							|  |  |  |         - stderr_lines: result of calling splitlines() on the stderr output | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         The child is run in isolated mode if the current interpreter supports | 
					
						
							|  |  |  |         that. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         result, py_cmd = run_python_until_end( | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |             "-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |             **env_vars | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         if not result.rc == 0: | 
					
						
							|  |  |  |             result.fail(py_cmd) | 
					
						
							|  |  |  |         # All subprocess outputs in this test case should be pure ASCII | 
					
						
							|  |  |  |         adjusted_output = cls._handle_output_variations(result.out) | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         stdout_lines = adjusted_output.decode("ascii").splitlines() | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         child_encoding_details = dict(cls(*stdout_lines)._asdict()) | 
					
						
							|  |  |  |         stderr_lines = result.err.decode("ascii").rstrip().splitlines() | 
					
						
							|  |  |  |         return child_encoding_details, stderr_lines | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Details of the shared library warning emitted at runtime | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | LEGACY_LOCALE_WARNING = ( | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " | 
					
						
							|  |  |  |     "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " | 
					
						
							|  |  |  |     "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " | 
					
						
							|  |  |  |     "locales is recommended." | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Details of the CLI locale coercion warning emitted at runtime | 
					
						
							|  |  |  | CLI_COERCION_WARNING_FMT = ( | 
					
						
							|  |  |  |     "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " | 
					
						
							|  |  |  |     "or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)." | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-13 13:32:31 +02:00
										 |  |  | AVAILABLE_TARGETS = None | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | CLI_COERCION_TARGET = None | 
					
						
							|  |  |  | CLI_COERCION_WARNING = None | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-13 13:32:31 +02:00
										 |  |  | def setUpModule(): | 
					
						
							|  |  |  |     global AVAILABLE_TARGETS | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     global CLI_COERCION_TARGET | 
					
						
							|  |  |  |     global CLI_COERCION_WARNING | 
					
						
							| 
									
										
										
										
											2017-06-13 13:32:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if AVAILABLE_TARGETS is not None: | 
					
						
							|  |  |  |         # initialization already done | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  |     AVAILABLE_TARGETS = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Find the target locales available in the current system | 
					
						
							|  |  |  |     for target_locale in _C_UTF8_LOCALES: | 
					
						
							|  |  |  |         if _set_locale_in_subprocess(target_locale): | 
					
						
							|  |  |  |             AVAILABLE_TARGETS.append(target_locale) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     if AVAILABLE_TARGETS: | 
					
						
							|  |  |  |         # Coercion is expected to use the first available target locale | 
					
						
							|  |  |  |         CLI_COERCION_TARGET = AVAILABLE_TARGETS[0] | 
					
						
							|  |  |  |         CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET) | 
					
						
							| 
									
										
										
										
											2017-06-13 13:32:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | class _LocaleHandlingTestCase(unittest.TestCase): | 
					
						
							|  |  |  |     # Base class to check expected locale handling behaviour | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     def _check_child_encoding_details(self, | 
					
						
							|  |  |  |                                       env_vars, | 
					
						
							|  |  |  |                                       expected_fs_encoding, | 
					
						
							|  |  |  |                                       expected_stream_encoding, | 
					
						
							|  |  |  |                                       expected_warnings, | 
					
						
							|  |  |  |                                       coercion_expected): | 
					
						
							|  |  |  |         """Check the C locale handling for the given process environment
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         Parameters: | 
					
						
							|  |  |  |             expected_fs_encoding: expected sys.getfilesystemencoding() result | 
					
						
							|  |  |  |             expected_stream_encoding: expected encoding for standard streams | 
					
						
							|  |  |  |             expected_warning: stderr output to expect (if any) | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         result = EncodingDetails.get_child_details(env_vars) | 
					
						
							|  |  |  |         encoding_details, stderr_lines = result | 
					
						
							|  |  |  |         expected_details = EncodingDetails.get_expected_details( | 
					
						
							|  |  |  |             coercion_expected, | 
					
						
							|  |  |  |             expected_fs_encoding, | 
					
						
							|  |  |  |             expected_stream_encoding, | 
					
						
							|  |  |  |             env_vars | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual(encoding_details, expected_details) | 
					
						
							|  |  |  |         if expected_warnings is None: | 
					
						
							|  |  |  |             expected_warnings = [] | 
					
						
							|  |  |  |         self.assertEqual(stderr_lines, expected_warnings) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | class LocaleConfigurationTests(_LocaleHandlingTestCase): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     # Test explicit external configuration via the process environment | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |     @classmethod | 
					
						
							|  |  |  |     def setUpClass(cls): | 
					
						
							|  |  |  |         # This relies on setUpModule() having been run, so it can't be | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         # handled via the @unittest.skipUnless decorator | 
					
						
							|  |  |  |         if not AVAILABLE_TARGETS: | 
					
						
							|  |  |  |             raise unittest.SkipTest("No C-with-UTF-8 locale available") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     def test_external_target_locale_configuration(self): | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         # Explicitly setting a target locale should give the same behaviour as | 
					
						
							|  |  |  |         # is seen when implicitly coercing to that target locale | 
					
						
							|  |  |  |         self.maxDiff = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |         expected_fs_encoding = "utf-8" | 
					
						
							|  |  |  |         expected_stream_encoding = "utf-8" | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  |         base_var_dict = { | 
					
						
							|  |  |  |             "LANG": "", | 
					
						
							|  |  |  |             "LC_CTYPE": "", | 
					
						
							|  |  |  |             "LC_ALL": "", | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |             "PYTHONCOERCECLOCALE": "", | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         } | 
					
						
							|  |  |  |         for env_var in ("LANG", "LC_CTYPE"): | 
					
						
							| 
									
										
										
										
											2017-06-13 13:32:31 +02:00
										 |  |  |             for locale_to_set in AVAILABLE_TARGETS: | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  |                 # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as | 
					
						
							|  |  |  |                 #                 expected, so skip that combination for now | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                 # See https://bugs.python.org/issue30672 for discussion | 
					
						
							| 
									
										
										
										
											2017-06-13 22:49:44 +10:00
										 |  |  |                 if env_var == "LANG" and locale_to_set == "UTF-8": | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |                 with self.subTest(env_var=env_var, | 
					
						
							|  |  |  |                                   configured_locale=locale_to_set): | 
					
						
							|  |  |  |                     var_dict = base_var_dict.copy() | 
					
						
							|  |  |  |                     var_dict[env_var] = locale_to_set | 
					
						
							|  |  |  |                     self._check_child_encoding_details(var_dict, | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |                                                        expected_fs_encoding, | 
					
						
							|  |  |  |                                                        expected_stream_encoding, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                                        expected_warnings=None, | 
					
						
							|  |  |  |                                                        coercion_expected=False) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @test.support.cpython_only | 
					
						
							|  |  |  | @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), | 
					
						
							|  |  |  |                      "C locale coercion disabled at build time") | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  | class LocaleCoercionTests(_LocaleHandlingTestCase): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     # Test implicit reconfiguration of the environment during CLI startup | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     def _check_c_locale_coercion(self, | 
					
						
							|  |  |  |                                  fs_encoding, stream_encoding, | 
					
						
							|  |  |  |                                  coerce_c_locale, | 
					
						
							|  |  |  |                                  expected_warnings=None, | 
					
						
							|  |  |  |                                  coercion_expected=True, | 
					
						
							|  |  |  |                                  **extra_vars): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         """Check the C locale handling for various configurations
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Parameters: | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |             fs_encoding: expected sys.getfilesystemencoding() result | 
					
						
							|  |  |  |             stream_encoding: expected encoding for standard streams | 
					
						
							|  |  |  |             coerce_c_locale: setting to use for PYTHONCOERCECLOCALE | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |               None: don't set the variable at all | 
					
						
							|  |  |  |               str: the value set in the child's environment | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |             expected_warnings: expected warning lines on stderr | 
					
						
							|  |  |  |             extra_vars: additional environment variables to set in subprocess | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         """
 | 
					
						
							|  |  |  |         self.maxDiff = None | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         if not AVAILABLE_TARGETS: | 
					
						
							|  |  |  |             # Locale coercion is disabled when there aren't any target locales | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |             fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING | 
					
						
							|  |  |  |             stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |             coercion_expected = False | 
					
						
							|  |  |  |             if expected_warnings: | 
					
						
							|  |  |  |                 expected_warnings = [LEGACY_LOCALE_WARNING] | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  |         base_var_dict = { | 
					
						
							|  |  |  |             "LANG": "", | 
					
						
							|  |  |  |             "LC_CTYPE": "", | 
					
						
							|  |  |  |             "LC_ALL": "", | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |             "PYTHONCOERCECLOCALE": "", | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         base_var_dict.update(extra_vars) | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         if coerce_c_locale is not None: | 
					
						
							|  |  |  |             base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale | 
					
						
							| 
									
										
										
										
											2017-11-12 12:45:59 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         # Check behaviour for the default locale | 
					
						
							|  |  |  |         with self.subTest(default_locale=True, | 
					
						
							|  |  |  |                           PYTHONCOERCECLOCALE=coerce_c_locale): | 
					
						
							|  |  |  |             if EXPECT_COERCION_IN_DEFAULT_LOCALE: | 
					
						
							| 
									
										
										
										
											2017-11-12 12:45:59 +01:00
										 |  |  |                 _expected_warnings = expected_warnings | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |                 _coercion_expected = coercion_expected | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 _expected_warnings = None | 
					
						
							|  |  |  |                 _coercion_expected = False | 
					
						
							|  |  |  |             # On Android CLI_COERCION_WARNING is not printed when all the | 
					
						
							|  |  |  |             # locale environment variables are undefined or empty. When | 
					
						
							|  |  |  |             # this code path is run with environ['LC_ALL'] == 'C', then | 
					
						
							|  |  |  |             # LEGACY_LOCALE_WARNING is printed. | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |             if (test.support.is_android and | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |                     _expected_warnings == [CLI_COERCION_WARNING]): | 
					
						
							|  |  |  |                 _expected_warnings = None | 
					
						
							|  |  |  |             self._check_child_encoding_details(base_var_dict, | 
					
						
							|  |  |  |                                                fs_encoding, | 
					
						
							|  |  |  |                                                stream_encoding, | 
					
						
							|  |  |  |                                                _expected_warnings, | 
					
						
							|  |  |  |                                                _coercion_expected) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Check behaviour for explicitly configured locales | 
					
						
							|  |  |  |         for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: | 
					
						
							|  |  |  |             for env_var in ("LANG", "LC_CTYPE"): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |                 with self.subTest(env_var=env_var, | 
					
						
							|  |  |  |                                   nominal_locale=locale_to_set, | 
					
						
							|  |  |  |                                   PYTHONCOERCECLOCALE=coerce_c_locale): | 
					
						
							|  |  |  |                     var_dict = base_var_dict.copy() | 
					
						
							|  |  |  |                     var_dict[env_var] = locale_to_set | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                     # Check behaviour on successful coercion | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |                     self._check_child_encoding_details(var_dict, | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |                                                        fs_encoding, | 
					
						
							|  |  |  |                                                        stream_encoding, | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |                                                        expected_warnings, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                                        coercion_expected) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |     def test_PYTHONCOERCECLOCALE_not_set(self): | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         # This should coerce to the first available target locale by default | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |         self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_PYTHONCOERCECLOCALE_not_zero(self): | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         # *Any* string other than "0" is considered "set" for our purposes | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |         # and hence should result in the locale coercion being enabled | 
					
						
							|  |  |  |         for setting in ("", "1", "true", "false"): | 
					
						
							| 
									
										
										
										
											2017-06-15 19:11:39 +10:00
										 |  |  |             self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     def test_PYTHONCOERCECLOCALE_set_to_warn(self): | 
					
						
							|  |  |  |         # PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales | 
					
						
							|  |  |  |         self._check_c_locale_coercion("utf-8", "utf-8", | 
					
						
							|  |  |  |                                       coerce_c_locale="warn", | 
					
						
							|  |  |  |                                       expected_warnings=[CLI_COERCION_WARNING]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     def test_PYTHONCOERCECLOCALE_set_to_zero(self): | 
					
						
							|  |  |  |         # The setting "0" should result in the locale coercion being disabled | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | 
					
						
							|  |  |  |                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                       coerce_c_locale="0", | 
					
						
							|  |  |  |                                       coercion_expected=False) | 
					
						
							|  |  |  |         # Setting LC_ALL=C shouldn't make any difference to the behaviour | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | 
					
						
							|  |  |  |                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                       coerce_c_locale="0", | 
					
						
							|  |  |  |                                       LC_ALL="C", | 
					
						
							|  |  |  |                                       coercion_expected=False) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |     def test_LC_ALL_set_to_C(self): | 
					
						
							|  |  |  |         # Setting LC_ALL should render the locale coercion ineffective | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | 
					
						
							|  |  |  |                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                       coerce_c_locale=None, | 
					
						
							|  |  |  |                                       LC_ALL="C", | 
					
						
							|  |  |  |                                       coercion_expected=False) | 
					
						
							|  |  |  |         # And result in a warning about a lack of locale compatibility | 
					
						
							| 
									
										
										
										
											2017-12-16 21:51:19 +13:00
										 |  |  |         self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING, | 
					
						
							|  |  |  |                                       EXPECTED_C_LOCALE_STREAM_ENCODING, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |                                       coerce_c_locale="warn", | 
					
						
							|  |  |  |                                       LC_ALL="C", | 
					
						
							|  |  |  |                                       expected_warnings=[LEGACY_LOCALE_WARNING], | 
					
						
							|  |  |  |                                       coercion_expected=False) | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  | 
 | 
					
						
							|  |  |  | def test_main(): | 
					
						
							|  |  |  |     test.support.run_unittest( | 
					
						
							|  |  |  |         LocaleConfigurationTests, | 
					
						
							| 
									
										
										
										
											2017-06-18 12:29:42 +10:00
										 |  |  |         LocaleCoercionTests | 
					
						
							| 
									
										
										
										
											2017-06-11 13:16:15 +10:00
										 |  |  |     ) | 
					
						
							|  |  |  |     test.support.reap_children() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     test_main() |