2017-06-11 13:16:15 +10:00
|
|
|
# Tests the attempted automatic coercion of the C locale to a UTF-8 locale
|
|
|
|
|
2017-06-30 00:48:14 +10:00
|
|
|
import locale
|
2017-06-11 13:16:15 +10:00
|
|
|
import os
|
2018-11-30 11:34:47 +01:00
|
|
|
import subprocess
|
2017-06-11 13:16:15 +10:00
|
|
|
import sys
|
|
|
|
import sysconfig
|
2018-11-30 11:34:47 +01:00
|
|
|
import unittest
|
2017-06-11 13:16:15 +10:00
|
|
|
from collections import namedtuple
|
|
|
|
|
2018-11-21 12:21:25 +01:00
|
|
|
from test import support
|
2019-07-01 18:28:25 +02:00
|
|
|
from test.support.script_helper import run_python_until_end
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
# Set the list of ways we expect to be able to ask for the "C" locale.
|
|
|
|
# 'invalid.ascii' is an invalid LOCALE name and so should get turned in to the
|
|
|
|
# default locale, which is traditionally C.
|
2025-09-23 19:20:59 +02:00
|
|
|
EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "POSIX", "invalid.ascii"]
|
2017-12-16 21:51:19 +13:00
|
|
|
|
2017-06-13 22:49:44 +10:00
|
|
|
# Set our expectation for the default encoding used in the C locale
|
|
|
|
# for the filesystem encoding and the standard streams
|
2017-12-16 21:51:19 +13:00
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING = "ascii"
|
|
|
|
EXPECTED_C_LOCALE_FS_ENCODING = "ascii"
|
|
|
|
|
|
|
|
# Set our expectation for the default locale used when none is specified
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
DEFAULT_LOCALE_IS_C = True
|
2017-12-16 21:51:19 +13:00
|
|
|
EXPECT_COERCION_IN_DEFAULT_LOCALE = True
|
|
|
|
|
2018-11-30 11:34:47 +01:00
|
|
|
TARGET_LOCALES = ["C.UTF-8", "C.utf8", "UTF-8"]
|
|
|
|
|
2017-12-16 21:51:19 +13:00
|
|
|
# Apply some platform dependent overrides
|
2024-03-11 19:25:39 +00:00
|
|
|
if sys.platform == "android":
|
|
|
|
# Android defaults to using UTF-8 for all system interfaces
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
|
|
|
|
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
elif support.linked_to_musl():
|
|
|
|
# MUSL defaults to utf-8 unless the C locale is set explicitly.
|
|
|
|
EXPECTED_C_LOCALE_EQUIVALENTS = ["C"]
|
|
|
|
DEFAULT_LOCALE_IS_C = False
|
|
|
|
DEFAULT_ENCODING = 'utf-8'
|
|
|
|
EXPECT_COERCION_IN_DEFAULT_LOCALE = False
|
2017-12-16 21:51:19 +13:00
|
|
|
elif sys.platform.startswith("aix"):
|
|
|
|
# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING = "iso8859-1"
|
|
|
|
EXPECTED_C_LOCALE_FS_ENCODING = "iso8859-1"
|
|
|
|
elif sys.platform == "darwin":
|
|
|
|
# FS encoding is UTF-8 on macOS
|
|
|
|
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
|
|
|
|
elif sys.platform == "cygwin":
|
|
|
|
# Cygwin defaults to using C.UTF-8
|
|
|
|
# TODO: Work out a robust dynamic test for this that doesn't rely on
|
|
|
|
# CPython's own locale handling machinery
|
|
|
|
EXPECT_COERCION_IN_DEFAULT_LOCALE = False
|
2020-04-09 23:46:23 +08:00
|
|
|
elif sys.platform == "vxworks":
|
|
|
|
# VxWorks defaults to using UTF-8 for all system interfaces
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
|
|
|
|
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
|
2017-12-16 21:51:19 +13:00
|
|
|
|
|
|
|
# Note that the above expectations are still wrong in some cases, such as:
|
2017-06-13 22:49:44 +10:00
|
|
|
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
|
2017-12-16 21:51:19 +13:00
|
|
|
# * Any platform other than AIX that uses latin-1 in the C locale
|
|
|
|
# * Any Linux distro where POSIX isn't a simple alias for the C locale
|
|
|
|
# * Any Linux distro where the default locale is something other than "C"
|
2017-06-18 12:29:42 +10:00
|
|
|
#
|
|
|
|
# Options for dealing with this:
|
2017-12-16 21:51:19 +13:00
|
|
|
# * Don't set the PY_COERCE_C_LOCALE preprocessor definition on
|
|
|
|
# such platforms (e.g. it isn't set on Windows)
|
2017-06-18 12:29:42 +10:00
|
|
|
# * Fix the test expectations to match the actual platform behaviour
|
2017-06-13 22:49:44 +10:00
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
# In order to get the warning messages to match up as expected, the candidate
|
|
|
|
# order here must much the target locale order in Python/pylifecycle.c
|
2017-06-30 00:48:14 +10:00
|
|
|
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
# There's no reliable cross-platform way of checking locale alias
|
|
|
|
# lists, so the only way of knowing which of these locales will work
|
|
|
|
# is to try them with locale.setlocale(). We do that in a subprocess
|
2017-12-16 21:51:19 +13:00
|
|
|
# in setUpModule() below to avoid altering the locale of the test runner.
|
2017-06-30 00:48:14 +10:00
|
|
|
#
|
|
|
|
# If the relevant locale module attributes exist, and we're not on a platform
|
|
|
|
# where we expect it to always succeed, we also check that
|
|
|
|
# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter
|
|
|
|
# will skip locale coercion for that particular target locale
|
|
|
|
_check_nl_langinfo_CODESET = bool(
|
|
|
|
sys.platform not in ("darwin", "linux") and
|
|
|
|
hasattr(locale, "nl_langinfo") and
|
|
|
|
hasattr(locale, "CODESET")
|
|
|
|
)
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
def _set_locale_in_subprocess(locale_name):
|
|
|
|
cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))"
|
2017-06-30 00:48:14 +10:00
|
|
|
if _check_nl_langinfo_CODESET:
|
|
|
|
# If there's no valid CODESET, we expect coercion to be skipped
|
|
|
|
cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))"
|
2017-06-11 13:16:15 +10:00
|
|
|
cmd = cmd_fmt.format(locale_name)
|
2017-12-16 04:54:22 +01:00
|
|
|
result, py_cmd = run_python_until_end("-c", cmd, PYTHONCOERCECLOCALE='')
|
2017-06-11 13:16:15 +10:00
|
|
|
return result.rc == 0
|
|
|
|
|
2017-06-30 00:48:14 +10:00
|
|
|
|
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
|
|
|
|
_EncodingDetails = namedtuple("EncodingDetails", _fields)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
class EncodingDetails(_EncodingDetails):
|
2017-06-18 12:29:42 +10:00
|
|
|
# XXX (ncoghlan): Using JSON for child state reporting may be less fragile
|
2017-06-11 13:16:15 +10:00
|
|
|
CHILD_PROCESS_SCRIPT = ";".join([
|
2017-06-18 12:29:42 +10:00
|
|
|
"import sys, os",
|
2017-06-11 13:16:15 +10:00
|
|
|
"print(sys.getfilesystemencoding())",
|
|
|
|
"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
|
|
|
|
"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
|
|
|
|
"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
|
2017-06-18 12:29:42 +10:00
|
|
|
"print(os.environ.get('LANG', 'not set'))",
|
|
|
|
"print(os.environ.get('LC_CTYPE', 'not set'))",
|
|
|
|
"print(os.environ.get('LC_ALL', 'not set'))",
|
2017-06-11 13:16:15 +10:00
|
|
|
])
|
|
|
|
|
|
|
|
@classmethod
|
2023-12-22 12:09:16 +01:00
|
|
|
def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, stream_errors, env_vars):
|
2017-06-11 13:16:15 +10:00
|
|
|
"""Returns expected child process details for a given encoding"""
|
2017-06-15 19:11:39 +10:00
|
|
|
_stream = stream_encoding + ":{}"
|
2023-12-22 12:09:16 +01:00
|
|
|
if stream_errors is None:
|
|
|
|
# stdin and stdout should use surrogateescape either because the
|
|
|
|
# coercion triggered, or because the C locale was detected
|
|
|
|
stream_errors = "surrogateescape"
|
|
|
|
|
|
|
|
stream_info = [_stream.format(stream_errors)] * 2
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
# stderr should always use backslashreplace
|
|
|
|
stream_info.append(_stream.format("backslashreplace"))
|
2019-07-02 12:48:28 +02:00
|
|
|
expected_lang = env_vars.get("LANG", "not set")
|
2017-06-18 12:29:42 +10:00
|
|
|
if coercion_expected:
|
2019-07-02 12:48:28 +02:00
|
|
|
expected_lc_ctype = CLI_COERCION_TARGET
|
2017-06-18 12:29:42 +10:00
|
|
|
else:
|
2019-07-02 12:48:28 +02:00
|
|
|
expected_lc_ctype = env_vars.get("LC_CTYPE", "not set")
|
|
|
|
expected_lc_all = env_vars.get("LC_ALL", "not set")
|
2017-06-18 12:29:42 +10:00
|
|
|
env_info = expected_lang, expected_lc_ctype, expected_lc_all
|
|
|
|
return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
@classmethod
|
2018-09-19 14:56:36 -07:00
|
|
|
def get_child_details(cls, env_vars):
|
2017-06-11 13:16:15 +10:00
|
|
|
"""Retrieves fsencoding and standard stream details from a child process
|
|
|
|
|
|
|
|
Returns (encoding_details, stderr_lines):
|
|
|
|
|
|
|
|
- encoding_details: EncodingDetails for eager decoding
|
|
|
|
- stderr_lines: result of calling splitlines() on the stderr output
|
|
|
|
|
|
|
|
The child is run in isolated mode if the current interpreter supports
|
|
|
|
that.
|
|
|
|
"""
|
2018-09-19 14:56:36 -07:00
|
|
|
result, py_cmd = run_python_until_end(
|
|
|
|
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
|
|
|
|
**env_vars
|
|
|
|
)
|
2017-06-11 13:16:15 +10:00
|
|
|
if not result.rc == 0:
|
|
|
|
result.fail(py_cmd)
|
|
|
|
# All subprocess outputs in this test case should be pure ASCII
|
2019-07-02 12:48:28 +02:00
|
|
|
stdout_lines = result.out.decode("ascii").splitlines()
|
2017-06-11 13:16:15 +10:00
|
|
|
child_encoding_details = dict(cls(*stdout_lines)._asdict())
|
|
|
|
stderr_lines = result.err.decode("ascii").rstrip().splitlines()
|
|
|
|
return child_encoding_details, stderr_lines
|
|
|
|
|
|
|
|
|
|
|
|
# Details of the shared library warning emitted at runtime
|
2017-06-18 12:29:42 +10:00
|
|
|
LEGACY_LOCALE_WARNING = (
|
2017-06-11 13:16:15 +10:00
|
|
|
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
|
|
|
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
|
|
|
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
|
|
|
"locales is recommended."
|
|
|
|
)
|
|
|
|
|
|
|
|
# Details of the CLI locale coercion warning emitted at runtime
|
|
|
|
CLI_COERCION_WARNING_FMT = (
|
|
|
|
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
|
|
|
|
"or PYTHONCOERCECLOCALE=0 to disable this locale coercion behavior)."
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2017-06-13 13:32:31 +02:00
|
|
|
AVAILABLE_TARGETS = None
|
2017-06-18 12:29:42 +10:00
|
|
|
CLI_COERCION_TARGET = None
|
|
|
|
CLI_COERCION_WARNING = None
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-06-13 13:32:31 +02:00
|
|
|
def setUpModule():
|
|
|
|
global AVAILABLE_TARGETS
|
2017-06-18 12:29:42 +10:00
|
|
|
global CLI_COERCION_TARGET
|
|
|
|
global CLI_COERCION_WARNING
|
2017-06-13 13:32:31 +02:00
|
|
|
|
|
|
|
if AVAILABLE_TARGETS is not None:
|
|
|
|
# initialization already done
|
|
|
|
return
|
|
|
|
AVAILABLE_TARGETS = []
|
|
|
|
|
|
|
|
# Find the target locales available in the current system
|
|
|
|
for target_locale in _C_UTF8_LOCALES:
|
|
|
|
if _set_locale_in_subprocess(target_locale):
|
|
|
|
AVAILABLE_TARGETS.append(target_locale)
|
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
if AVAILABLE_TARGETS:
|
|
|
|
# Coercion is expected to use the first available target locale
|
|
|
|
CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
|
|
|
|
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
|
2017-06-13 13:32:31 +02:00
|
|
|
|
2018-11-21 12:21:25 +01:00
|
|
|
if support.verbose:
|
|
|
|
print(f"AVAILABLE_TARGETS = {AVAILABLE_TARGETS!r}")
|
|
|
|
print(f"EXPECTED_C_LOCALE_EQUIVALENTS = {EXPECTED_C_LOCALE_EQUIVALENTS!r}")
|
|
|
|
print(f"EXPECTED_C_LOCALE_STREAM_ENCODING = {EXPECTED_C_LOCALE_STREAM_ENCODING!r}")
|
|
|
|
print(f"EXPECTED_C_LOCALE_FS_ENCODING = {EXPECTED_C_LOCALE_FS_ENCODING!r}")
|
|
|
|
print(f"EXPECT_COERCION_IN_DEFAULT_LOCALE = {EXPECT_COERCION_IN_DEFAULT_LOCALE!r}")
|
|
|
|
print(f"_C_UTF8_LOCALES = {_C_UTF8_LOCALES!r}")
|
|
|
|
print(f"_check_nl_langinfo_CODESET = {_check_nl_langinfo_CODESET!r}")
|
|
|
|
|
2017-06-13 13:32:31 +02:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
class _LocaleHandlingTestCase(unittest.TestCase):
|
|
|
|
# Base class to check expected locale handling behaviour
|
2017-06-13 22:49:44 +10:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
def _check_child_encoding_details(self,
|
|
|
|
env_vars,
|
|
|
|
expected_fs_encoding,
|
|
|
|
expected_stream_encoding,
|
2023-12-22 12:09:16 +01:00
|
|
|
expected_stream_errors,
|
2017-06-18 12:29:42 +10:00
|
|
|
expected_warnings,
|
2018-09-19 14:56:36 -07:00
|
|
|
coercion_expected):
|
2017-06-18 12:29:42 +10:00
|
|
|
"""Check the C locale handling for the given process environment
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
Parameters:
|
|
|
|
expected_fs_encoding: expected sys.getfilesystemencoding() result
|
|
|
|
expected_stream_encoding: expected encoding for standard streams
|
|
|
|
expected_warning: stderr output to expect (if any)
|
|
|
|
"""
|
2018-09-19 14:56:36 -07:00
|
|
|
result = EncodingDetails.get_child_details(env_vars)
|
2017-06-18 12:29:42 +10:00
|
|
|
encoding_details, stderr_lines = result
|
|
|
|
expected_details = EncodingDetails.get_expected_details(
|
|
|
|
coercion_expected,
|
|
|
|
expected_fs_encoding,
|
|
|
|
expected_stream_encoding,
|
2023-12-22 12:09:16 +01:00
|
|
|
expected_stream_errors,
|
2017-06-18 12:29:42 +10:00
|
|
|
env_vars
|
|
|
|
)
|
|
|
|
self.assertEqual(encoding_details, expected_details)
|
|
|
|
if expected_warnings is None:
|
|
|
|
expected_warnings = []
|
|
|
|
self.assertEqual(stderr_lines, expected_warnings)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
|
|
|
|
class LocaleConfigurationTests(_LocaleHandlingTestCase):
|
2017-06-11 13:16:15 +10:00
|
|
|
# Test explicit external configuration via the process environment
|
|
|
|
|
2017-12-16 21:51:19 +13:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
# This relies on setUpModule() having been run, so it can't be
|
2017-06-18 12:29:42 +10:00
|
|
|
# handled via the @unittest.skipUnless decorator
|
|
|
|
if not AVAILABLE_TARGETS:
|
|
|
|
raise unittest.SkipTest("No C-with-UTF-8 locale available")
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
def test_external_target_locale_configuration(self):
|
2017-06-18 12:29:42 +10:00
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
# Explicitly setting a target locale should give the same behaviour as
|
|
|
|
# is seen when implicitly coercing to that target locale
|
|
|
|
self.maxDiff = None
|
|
|
|
|
2017-06-15 19:11:39 +10:00
|
|
|
expected_fs_encoding = "utf-8"
|
|
|
|
expected_stream_encoding = "utf-8"
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
base_var_dict = {
|
|
|
|
"LANG": "",
|
|
|
|
"LC_CTYPE": "",
|
|
|
|
"LC_ALL": "",
|
2017-12-16 04:54:22 +01:00
|
|
|
"PYTHONCOERCECLOCALE": "",
|
2023-12-22 12:09:16 +01:00
|
|
|
"PYTHONIOENCODING": "",
|
2017-06-11 13:16:15 +10:00
|
|
|
}
|
|
|
|
for env_var in ("LANG", "LC_CTYPE"):
|
2017-06-13 13:32:31 +02:00
|
|
|
for locale_to_set in AVAILABLE_TARGETS:
|
2017-06-13 22:49:44 +10:00
|
|
|
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
|
|
|
|
# expected, so skip that combination for now
|
2017-06-18 12:29:42 +10:00
|
|
|
# See https://bugs.python.org/issue30672 for discussion
|
2017-06-13 22:49:44 +10:00
|
|
|
if env_var == "LANG" and locale_to_set == "UTF-8":
|
|
|
|
continue
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
with self.subTest(env_var=env_var,
|
|
|
|
configured_locale=locale_to_set):
|
|
|
|
var_dict = base_var_dict.copy()
|
|
|
|
var_dict[env_var] = locale_to_set
|
|
|
|
self._check_child_encoding_details(var_dict,
|
2017-06-15 19:11:39 +10:00
|
|
|
expected_fs_encoding,
|
|
|
|
expected_stream_encoding,
|
2023-12-22 12:09:16 +01:00
|
|
|
expected_stream_errors=None,
|
2017-06-18 12:29:42 +10:00
|
|
|
expected_warnings=None,
|
|
|
|
coercion_expected=False)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2023-12-22 12:09:16 +01:00
|
|
|
def test_with_ioencoding(self):
|
|
|
|
# Explicitly setting a target locale should give the same behaviour as
|
|
|
|
# is seen when implicitly coercing to that target locale
|
|
|
|
self.maxDiff = None
|
|
|
|
|
|
|
|
expected_fs_encoding = "utf-8"
|
|
|
|
expected_stream_encoding = "utf-8"
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2023-12-22 12:09:16 +01:00
|
|
|
base_var_dict = {
|
|
|
|
"LANG": "",
|
|
|
|
"LC_CTYPE": "",
|
|
|
|
"LC_ALL": "",
|
|
|
|
"PYTHONCOERCECLOCALE": "",
|
|
|
|
"PYTHONIOENCODING": "UTF-8",
|
|
|
|
}
|
|
|
|
for env_var in ("LANG", "LC_CTYPE"):
|
|
|
|
for locale_to_set in AVAILABLE_TARGETS:
|
|
|
|
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
|
|
|
|
# expected, so skip that combination for now
|
|
|
|
# See https://bugs.python.org/issue30672 for discussion
|
|
|
|
if env_var == "LANG" and locale_to_set == "UTF-8":
|
|
|
|
continue
|
|
|
|
|
|
|
|
with self.subTest(env_var=env_var,
|
|
|
|
configured_locale=locale_to_set):
|
|
|
|
var_dict = base_var_dict.copy()
|
|
|
|
var_dict[env_var] = locale_to_set
|
|
|
|
self._check_child_encoding_details(var_dict,
|
|
|
|
expected_fs_encoding,
|
|
|
|
expected_stream_encoding,
|
|
|
|
expected_stream_errors="strict",
|
|
|
|
expected_warnings=None,
|
|
|
|
coercion_expected=False)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2018-11-21 12:21:25 +01:00
|
|
|
@support.cpython_only
|
2017-06-11 13:16:15 +10:00
|
|
|
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
|
|
|
|
"C locale coercion disabled at build time")
|
2017-06-18 12:29:42 +10:00
|
|
|
class LocaleCoercionTests(_LocaleHandlingTestCase):
|
2017-06-11 13:16:15 +10:00
|
|
|
# Test implicit reconfiguration of the environment during CLI startup
|
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
def _check_c_locale_coercion(self,
|
|
|
|
fs_encoding, stream_encoding,
|
|
|
|
coerce_c_locale,
|
|
|
|
expected_warnings=None,
|
|
|
|
coercion_expected=True,
|
|
|
|
**extra_vars):
|
2017-06-11 13:16:15 +10:00
|
|
|
"""Check the C locale handling for various configurations
|
|
|
|
|
|
|
|
Parameters:
|
2017-06-15 19:11:39 +10:00
|
|
|
fs_encoding: expected sys.getfilesystemencoding() result
|
|
|
|
stream_encoding: expected encoding for standard streams
|
|
|
|
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
|
2017-06-11 13:16:15 +10:00
|
|
|
None: don't set the variable at all
|
|
|
|
str: the value set in the child's environment
|
2017-06-18 12:29:42 +10:00
|
|
|
expected_warnings: expected warning lines on stderr
|
|
|
|
extra_vars: additional environment variables to set in subprocess
|
2017-06-11 13:16:15 +10:00
|
|
|
"""
|
|
|
|
self.maxDiff = None
|
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
if not AVAILABLE_TARGETS:
|
|
|
|
# Locale coercion is disabled when there aren't any target locales
|
2017-12-16 21:51:19 +13:00
|
|
|
fs_encoding = EXPECTED_C_LOCALE_FS_ENCODING
|
|
|
|
stream_encoding = EXPECTED_C_LOCALE_STREAM_ENCODING
|
2017-06-18 12:29:42 +10:00
|
|
|
coercion_expected = False
|
|
|
|
if expected_warnings:
|
|
|
|
expected_warnings = [LEGACY_LOCALE_WARNING]
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
base_var_dict = {
|
|
|
|
"LANG": "",
|
|
|
|
"LC_CTYPE": "",
|
|
|
|
"LC_ALL": "",
|
2017-12-16 04:54:22 +01:00
|
|
|
"PYTHONCOERCECLOCALE": "",
|
2023-12-22 12:09:16 +01:00
|
|
|
"PYTHONIOENCODING": "",
|
2017-06-11 13:16:15 +10:00
|
|
|
}
|
2017-06-18 12:29:42 +10:00
|
|
|
base_var_dict.update(extra_vars)
|
2017-12-16 21:51:19 +13:00
|
|
|
if coerce_c_locale is not None:
|
2018-09-19 14:56:36 -07:00
|
|
|
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
|
2017-11-12 12:45:59 +01:00
|
|
|
|
2017-12-16 21:51:19 +13:00
|
|
|
# Check behaviour for the default locale
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
_fs_encoding = fs_encoding
|
|
|
|
_stream_encoding = stream_encoding
|
|
|
|
if not DEFAULT_LOCALE_IS_C and 'LC_ALL' not in extra_vars:
|
|
|
|
_fs_encoding = _stream_encoding = DEFAULT_ENCODING
|
2017-12-16 21:51:19 +13:00
|
|
|
with self.subTest(default_locale=True,
|
|
|
|
PYTHONCOERCECLOCALE=coerce_c_locale):
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
if (EXPECT_COERCION_IN_DEFAULT_LOCALE
|
|
|
|
or (not DEFAULT_LOCALE_IS_C and 'LC_ALL' in extra_vars)):
|
2017-11-12 12:45:59 +01:00
|
|
|
_expected_warnings = expected_warnings
|
2017-12-16 21:51:19 +13:00
|
|
|
_coercion_expected = coercion_expected
|
|
|
|
else:
|
|
|
|
_expected_warnings = None
|
|
|
|
_coercion_expected = False
|
|
|
|
# On Android CLI_COERCION_WARNING is not printed when all the
|
|
|
|
# locale environment variables are undefined or empty. When
|
|
|
|
# this code path is run with environ['LC_ALL'] == 'C', then
|
|
|
|
# LEGACY_LOCALE_WARNING is printed.
|
2018-11-21 12:21:25 +01:00
|
|
|
if (support.is_android and
|
2017-12-16 21:51:19 +13:00
|
|
|
_expected_warnings == [CLI_COERCION_WARNING]):
|
|
|
|
_expected_warnings = None
|
|
|
|
self._check_child_encoding_details(base_var_dict,
|
gh-90548: Allow Alpine/MUSL to pass test_c_locale_coercion. (GH-134454)
Like cygwin, MUSL defaults to utf-8 if no variables are set. I have no
idea if the existing tests pass on cygwin, but I made the modifications
such that I shouldn't break it if is. The additional checks needed for
MUSL are guarded by DEFAULT_LOCALE_IS_C being False. Based on this
flag, we expect utf-8 for the encodings and no coercion message, as
long as LC_ALL is not set to C. (That looks like a bit of an issue with
the test structure, but I'm not going to attempt to "fix" that.)
DEFAULT_ENCODING is intentionally not given a default since it is only
used when DEFAULT_LOCALE_IS_C is False, and if you use the flag you'll
need to set it.
After reading through issue 30672, looking at the source, and running a
test on Android, I *think* the current situation is that coercion will
be done if the local is set to POSIX regardless of platform. However,
if the platform doesn't make POSIX equivalent to C, the encodings when
coercion is disabled will not be the same as for C (it is utf-8 on
android, for example). This means the tests would fail if POSIX were
added unconditionally to the EXPECTED_C_LOCALE_EQUIVALENTS as envisioned
in the issue. This *could* be fixed with another flag, but I'm not sure
it is worth the effort. I'm not even sure Python is behaving optimally
in this case (assuming my analysis is correct). So I just altered the
comment and add POSIX if and only if the platform is linux.
2025-09-06 15:01:11 -04:00
|
|
|
_fs_encoding,
|
|
|
|
_stream_encoding,
|
2023-12-22 12:09:16 +01:00
|
|
|
None,
|
2017-12-16 21:51:19 +13:00
|
|
|
_expected_warnings,
|
2018-09-19 14:56:36 -07:00
|
|
|
_coercion_expected)
|
2017-12-16 21:51:19 +13:00
|
|
|
|
|
|
|
# Check behaviour for explicitly configured locales
|
|
|
|
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
|
|
|
|
for env_var in ("LANG", "LC_CTYPE"):
|
2017-06-11 13:16:15 +10:00
|
|
|
with self.subTest(env_var=env_var,
|
|
|
|
nominal_locale=locale_to_set,
|
2023-12-22 12:09:16 +01:00
|
|
|
PYTHONCOERCECLOCALE=coerce_c_locale,
|
|
|
|
PYTHONIOENCODING=""):
|
2017-06-11 13:16:15 +10:00
|
|
|
var_dict = base_var_dict.copy()
|
|
|
|
var_dict[env_var] = locale_to_set
|
2017-06-18 12:29:42 +10:00
|
|
|
# Check behaviour on successful coercion
|
2017-06-11 13:16:15 +10:00
|
|
|
self._check_child_encoding_details(var_dict,
|
2017-06-15 19:11:39 +10:00
|
|
|
fs_encoding,
|
|
|
|
stream_encoding,
|
2023-12-22 12:09:16 +01:00
|
|
|
None,
|
2017-12-16 21:51:19 +13:00
|
|
|
expected_warnings,
|
2018-09-19 14:56:36 -07:00
|
|
|
coercion_expected)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-12-16 21:51:19 +13:00
|
|
|
def test_PYTHONCOERCECLOCALE_not_set(self):
|
2017-06-11 13:16:15 +10:00
|
|
|
# This should coerce to the first available target locale by default
|
2017-06-15 19:11:39 +10:00
|
|
|
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
|
|
|
def test_PYTHONCOERCECLOCALE_not_zero(self):
|
2017-06-18 12:29:42 +10:00
|
|
|
# *Any* string other than "0" is considered "set" for our purposes
|
2017-06-11 13:16:15 +10:00
|
|
|
# and hence should result in the locale coercion being enabled
|
|
|
|
for setting in ("", "1", "true", "false"):
|
2017-06-15 19:11:39 +10:00
|
|
|
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
def test_PYTHONCOERCECLOCALE_set_to_warn(self):
|
|
|
|
# PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
|
|
|
|
self._check_c_locale_coercion("utf-8", "utf-8",
|
|
|
|
coerce_c_locale="warn",
|
|
|
|
expected_warnings=[CLI_COERCION_WARNING])
|
|
|
|
|
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
|
|
|
|
# The setting "0" should result in the locale coercion being disabled
|
2017-12-16 21:51:19 +13:00
|
|
|
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
2017-06-18 12:29:42 +10:00
|
|
|
coerce_c_locale="0",
|
|
|
|
coercion_expected=False)
|
|
|
|
# Setting LC_ALL=C shouldn't make any difference to the behaviour
|
2017-12-16 21:51:19 +13:00
|
|
|
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
2017-06-18 12:29:42 +10:00
|
|
|
coerce_c_locale="0",
|
|
|
|
LC_ALL="C",
|
|
|
|
coercion_expected=False)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2017-06-18 12:29:42 +10:00
|
|
|
def test_LC_ALL_set_to_C(self):
|
|
|
|
# Setting LC_ALL should render the locale coercion ineffective
|
2017-12-16 21:51:19 +13:00
|
|
|
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
2017-06-18 12:29:42 +10:00
|
|
|
coerce_c_locale=None,
|
|
|
|
LC_ALL="C",
|
|
|
|
coercion_expected=False)
|
|
|
|
# And result in a warning about a lack of locale compatibility
|
2017-12-16 21:51:19 +13:00
|
|
|
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
|
|
|
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
2017-06-18 12:29:42 +10:00
|
|
|
coerce_c_locale="warn",
|
|
|
|
LC_ALL="C",
|
|
|
|
expected_warnings=[LEGACY_LOCALE_WARNING],
|
|
|
|
coercion_expected=False)
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2018-11-30 11:34:47 +01:00
|
|
|
def test_PYTHONCOERCECLOCALE_set_to_one(self):
|
|
|
|
# skip the test if the LC_CTYPE locale is C or coerced
|
|
|
|
old_loc = locale.setlocale(locale.LC_CTYPE, None)
|
|
|
|
self.addCleanup(locale.setlocale, locale.LC_CTYPE, old_loc)
|
2020-09-04 08:47:40 +00:00
|
|
|
try:
|
|
|
|
loc = locale.setlocale(locale.LC_CTYPE, "")
|
|
|
|
except locale.Error as e:
|
|
|
|
self.skipTest(str(e))
|
2025-09-23 19:20:59 +02:00
|
|
|
if loc in ("C", "POSIX"):
|
|
|
|
self.skipTest("test requires LC_CTYPE locale different "
|
|
|
|
"than C and POSIX")
|
2018-11-30 11:34:47 +01:00
|
|
|
if loc in TARGET_LOCALES :
|
|
|
|
self.skipTest("coerced LC_CTYPE locale: %s" % loc)
|
|
|
|
|
|
|
|
# bpo-35336: PYTHONCOERCECLOCALE=1 must not coerce the LC_CTYPE locale
|
|
|
|
# if it's not equal to "C"
|
|
|
|
code = 'import locale; print(locale.setlocale(locale.LC_CTYPE, None))'
|
|
|
|
env = dict(os.environ, PYTHONCOERCECLOCALE='1')
|
|
|
|
cmd = subprocess.run([sys.executable, '-c', code],
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
env=env,
|
|
|
|
text=True)
|
|
|
|
self.assertEqual(cmd.stdout.rstrip(), loc)
|
|
|
|
|
|
|
|
|
2021-09-19 15:27:33 +03:00
|
|
|
def tearDownModule():
|
2018-11-21 12:21:25 +01:00
|
|
|
support.reap_children()
|
2017-06-11 13:16:15 +10:00
|
|
|
|
2021-09-19 15:27:33 +03:00
|
|
|
|
2017-06-11 13:16:15 +10:00
|
|
|
if __name__ == "__main__":
|
2021-09-19 15:27:33 +03:00
|
|
|
unittest.main()
|