gh-74857, PEP 538: Coerce POSIX locale to UTF-8 based locale (#139238)

This commit is contained in:
Victor Stinner 2025-09-23 19:20:59 +02:00 committed by GitHub
parent a79ce35c70
commit e8382e55c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 10 additions and 9 deletions

View file

@ -15,7 +15,7 @@
# Set the list of ways we expect to be able to ask for the "C" locale.
# 'invalid.ascii' is an invalid LOCALE name and so should get turned in to the
# default locale, which is traditionally C.
EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "invalid.ascii"]
EXPECTED_C_LOCALE_EQUIVALENTS = ["C", "POSIX", "invalid.ascii"]
# Set our expectation for the default encoding used in the C locale
# for the filesystem encoding and the standard streams
@ -55,11 +55,6 @@
# VxWorks defaults to using UTF-8 for all system interfaces
EXPECTED_C_LOCALE_STREAM_ENCODING = "utf-8"
EXPECTED_C_LOCALE_FS_ENCODING = "utf-8"
if sys.platform.startswith("linux"):
# Linux recognizes POSIX as a synonym for C. Python will always coerce
# if the locale is set to POSIX, but not all platforms will use the
# C locale encodings if POSIX is set, so we'll only test it on linux.
EXPECTED_C_LOCALE_EQUIVALENTS.append("POSIX")
# Note that the above expectations are still wrong in some cases, such as:
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
@ -467,8 +462,9 @@ def test_PYTHONCOERCECLOCALE_set_to_one(self):
loc = locale.setlocale(locale.LC_CTYPE, "")
except locale.Error as e:
self.skipTest(str(e))
if loc == "C":
self.skipTest("test requires LC_CTYPE locale different than C")
if loc in ("C", "POSIX"):
self.skipTest("test requires LC_CTYPE locale different "
"than C and POSIX")
if loc in TARGET_LOCALES :
self.skipTest("coerced LC_CTYPE locale: %s" % loc)

View file

@ -0,0 +1,2 @@
:pep:`538`: Coerce the POSIX locale to a UTF-8 based locale. Patch by Victor
Stinner.

View file

@ -209,7 +209,10 @@ _Py_LegacyLocaleDetected(int warn)
* we may also want to check for that explicitly.
*/
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
if (ctype_loc == NULL) {
return 0;
}
return (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0);
#else
/* Windows uses code pages instead of locales, so no locale is legacy */
return 0;